####################################################################
#
# The Perl::Tidy::Formatter package adds indentation, whitespace, and
# line breaks to the token stream
# Usage Outline:
#
# STEP 1: initialize or re-initialize Formatter with user options
# Perl::Tidy::Formatter::check_options($rOpts);
#
# STEP 2: crate a tokenizer for the source stream
#
# STEP 3: create a formatter for the destination stream
# my $formatter = Perl::Tidy::Formatter->new(
# ...
# sink_object => $destination,
# ...
# );
#
# STEP 4: process each input line (see sub Perl::Tidy::process_single_case)
# while ( my $line = $tokenizer->get_line() ) {
# $formatter->write_line($line);
# }
#
# STEP 4: finish formatting
# $formatter->finish_formatting($severe_error);
#
#####################################################################
# Index...
# CODE SECTION 1: Preliminary code, global definitions and sub new
# sub new
# CODE SECTION 2: Some Basic Utilities
# CODE SECTION 3: Check and process options
# sub check_options
# CODE SECTION 4: Receive lines from the tokenizer
# sub write_line
# CODE SECTION 5: Pre-process the entire file
# sub finish_formatting
# CODE SECTION 6: Process line-by-line
# sub process_all_lines
# CODE SECTION 7: Process lines of code
# process_line_of_CODE
# CODE SECTION 8: Utilities for setting breakpoints
# sub set_forced_breakpoint
# CODE SECTION 9: Process batches of code
# sub grind_batch_of_CODE
# CODE SECTION 10: Code to break long statements
# sub break_long_lines
# CODE SECTION 11: Code to break long lists
# sub break_lists
# CODE SECTION 12: Code for setting indentation
# CODE SECTION 13: Preparing batch of lines for vertical alignment
# sub convey_batch_to_vertical_aligner
# CODE SECTION 14: Code for creating closing side comments
# sub add_closing_side_comment
# CODE SECTION 15: Summarize
# sub wrapup
#######################################################################
# CODE SECTION 1: Preliminary code and global definitions up to sub new
#######################################################################
package Perl::Tidy::Formatter;
use strict;
use warnings;
# DEVEL_MODE gets switched on during automated testing for extra checking
use constant DEVEL_MODE => 0;
use constant EMPTY_STRING => q{};
use constant SPACE => q{ };
use constant BACKSLASH => q{\\};
{ #<<< A non-indenting brace to contain all lexical variables
use Carp;
use English qw( -no_match_vars );
use List::Util qw( min max first ); # min, max first are in Perl 5.8
our $VERSION = '20250214';
# List of hash keys to prevent -duk from listing them.
# 'break-open-compact-parens' is an unimplemented option.
# 'Unicode::Collate::Locale' is in the data for scan_unique_keys
my @unique_hash_keys_uu =
qw( rOpts file_writer_object unlike isnt break-open-compact-parens }]
Unicode::Collate::Locale );
# The Tokenizer will be loaded with the Formatter
##use Perl::Tidy::Tokenizer; # for is_keyword()
sub AUTOLOAD {
# Catch any undefined sub calls so that we are sure to get
# some diagnostic information. This sub should never be called
# except for a programming error.
our $AUTOLOAD;
return if ( $AUTOLOAD =~ /\bDESTROY$/ );
my ( $pkg, $fname, $lno ) = caller();
my $my_package = __PACKAGE__;
print {*STDERR} <<EOM;
======================================================================
Error detected in package '$my_package', version $VERSION
Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
Called from package: '$pkg'
Called from File '$fname' at line '$lno'
This error is probably due to a recent programming change
======================================================================
EOM
exit 1;
} ## end sub AUTOLOAD
sub DESTROY {
my $self = shift;
_decrement_count();
return;
}
sub Die {
my ($msg) = @_;
Perl::Tidy::Die($msg);
croak "unexpected return from Perl::Tidy::Die";
}
sub Warn {
my ($msg) = @_;
Perl::Tidy::Warn($msg);
return;
}
sub Fault {
my ($msg) = @_;
# This routine is called for errors that really should not occur
# except if there has been a bug introduced by a recent program change.
# Please add comments at calls to Fault to explain why the call
# should not occur, and where to look to fix it.
my ( $package0_uu, $filename0_uu, $line0, $subroutine0_uu ) = caller(0);
my ( $package1_uu, $filename1, $line1, $subroutine1 ) = caller(1);
my ( $package2_uu, $filename2_uu, $line2_uu, $subroutine2 ) = caller(2);
my $pkg = __PACKAGE__;
my $input_stream_name = get_input_stream_name();
Die(<<EOM);
==============================================================================
While operating on input stream with name: '$input_stream_name'
A fault was detected at line $line0 of sub '$subroutine1'
in file '$filename1'
which was called from line $line1 of sub '$subroutine2'
Message: '$msg'
This is probably an error introduced by a recent programming change.
$pkg reports VERSION='$VERSION'.
==============================================================================
EOM
croak "unexpected return from sub Die";
} ## end sub Fault
sub Fault_Warn {
my ($msg) = @_;
# This is the same as Fault except that it calls Warn instead of Die
# and returns.
my ( $package0_uu, $filename0_uu, $line0, $subroutine0_uu ) = caller(0);
my ( $package1_uu, $filename1, $line1, $subroutine1 ) = caller(1);
my ( $package2_uu, $filename2_uu, $line2_uu, $subroutine2 ) = caller(2);
my $input_stream_name = get_input_stream_name();
Warn(<<EOM);
==============================================================================
While operating on input stream with name: '$input_stream_name'
A fault was detected at line $line0 of sub '$subroutine1'
in file '$filename1'
which was called from line $line1 of sub '$subroutine2'
Message: '$msg'
This is probably an error introduced by a recent programming change.
Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
==============================================================================
EOM
return;
} ## end sub Fault_Warn
sub Exit {
my ($msg) = @_;
Perl::Tidy::Exit($msg);
croak "unexpected return from Perl::Tidy::Exit";
}
# Global variables ...
my (
#-----------------------------------------------------------------
# Section 1: Global variables which are either always constant or
# are constant after being configured by user-supplied
# parameters. They remain constant as a file is being processed.
# The INITIALIZER comment tells the sub responsible for initializing
# each variable. Failure to initialize or re-initialize a global
# variable can cause bugs which are hard to locate.
#-----------------------------------------------------------------
# INITIALIZER: sub check_options
$rOpts,
# short-cut option variables
# INITIALIZER: sub initialize_global_option_vars
$rOpts_add_newlines,
$rOpts_add_whitespace,
$rOpts_add_trailing_commas,
$rOpts_add_lone_trailing_commas,
$rOpts_blank_lines_after_opening_block,
$rOpts_block_brace_tightness,
$rOpts_block_brace_vertical_tightness,
$rOpts_brace_follower_vertical_tightness,
$rOpts_break_after_labels,
$rOpts_break_at_old_attribute_breakpoints,
$rOpts_break_at_old_comma_breakpoints,
$rOpts_break_at_old_keyword_breakpoints,
$rOpts_break_at_old_logical_breakpoints,
$rOpts_break_at_old_semicolon_breakpoints,
$rOpts_break_at_old_ternary_breakpoints,
$rOpts_break_open_compact_parens,
$rOpts_closing_side_comments,
$rOpts_closing_side_comment_else_flag,
$rOpts_closing_side_comment_maximum_text,
$rOpts_comma_arrow_breakpoints,
$rOpts_continuation_indentation,
$rOpts_cuddled_paren_brace,
$rOpts_delete_closing_side_comments,
$rOpts_delete_old_whitespace,
$rOpts_delete_side_comments,
$rOpts_delete_trailing_commas,
$rOpts_delete_lone_trailing_commas,
$rOpts_delete_weld_interfering_commas,
$rOpts_extended_continuation_indentation,
$rOpts_format_skipping,
$rOpts_freeze_whitespace,
$rOpts_function_paren_vertical_alignment,
$rOpts_fuzzy_line_length,
$rOpts_ignore_old_breakpoints,
$rOpts_ignore_side_comment_lengths,
$rOpts_ignore_perlcritic_comments,
$rOpts_indent_closing_brace,
$rOpts_indent_columns,
$rOpts_indent_leading_semicolon,
$rOpts_indent_only,
$rOpts_keep_interior_semicolons,
$rOpts_line_up_parentheses,
$rOpts_logical_padding,
$rOpts_maximum_consecutive_blank_lines,
$rOpts_maximum_fields_per_table,
$rOpts_maximum_line_length,
$rOpts_minimize_continuation_indentation,
$rOpts_one_line_block_semicolons,
$rOpts_opening_brace_always_on_right,
$rOpts_outdent_keywords,
$rOpts_outdent_labels,
$rOpts_outdent_long_comments,
$rOpts_outdent_long_quotes,
$rOpts_outdent_static_block_comments,
$rOpts_recombine,
$rOpts_qw_as_function,
$rOpts_short_concatenation_item_length,
$rOpts_space_prototype_paren,
$rOpts_space_signature_paren,
$rOpts_stack_closing_block_brace,
$rOpts_static_block_comments,
$rOpts_add_missing_else,
$rOpts_warn_missing_else,
$rOpts_tee_block_comments,
$rOpts_tee_pod,
$rOpts_tee_side_comments,
$rOpts_variable_maximum_line_length,
$rOpts_valign_code,
$rOpts_valign_side_comments,
$rOpts_valign_if_unless,
$rOpts_valign_wide_equals,
$rOpts_whitespace_cycle,
$rOpts_extended_block_tightness,
$rOpts_extended_line_up_parentheses,
$rOpts_warn_unique_keys_cutoff,
# Static hashes
# INITIALIZER: BEGIN block
%is_assignment,
%is_non_list_type,
%is_if_unless_and_or_last_next_redo_return,
%is_if_elsif_else_unless_while_until_for_foreach,
%is_if_unless_while_until_for_foreach,
%is_for_foreach,
%is_last_next_redo_return,
%is_if_unless,
%is_if_elsif,
%is_if_unless_elsif,
%is_if_unless_elsif_else,
%is_elsif_else,
%is_and_or,
%is_chain_operator,
%is_block_without_semicolon,
%ok_to_add_semicolon_for_block_type,
%is_opening_type,
%is_closing_type,
%is_opening_token,
%is_closing_token,
%is_ternary,
%is_equal_or_fat_comma,
%is_counted_type,
%is_opening_sequence_token,
%is_closing_sequence_token,
%matching_token,
%is_container_label_type,
%is_die_confess_croak_warn,
%is_my_our_local,
%is_soft_keep_break_type,
%is_indirect_object_taker,
@all_operators,
%is_do_follower,
%is_anon_sub_brace_follower,
%is_anon_sub_1_brace_follower,
%is_other_brace_follower,
%is_kwU,
%is_re_match_op,
%is_my_state_our,
%is_keyword_with_special_leading_term,
%is_s_y_m_slash,
%is_sigil,
# INITIALIZER: sub check_options
$controlled_comma_style,
# INITIALIZER: sub initialize_tightness_vars
%tightness,
# INITIALIZER: sub initialize_multiple_token_tightness
%multiple_token_tightness,
# INITIALIZER: initialize_old_breakpoint_controls
%keep_break_before_type,
%keep_break_after_type,
# INITIALIZER: initialize_container_indentation_options
%container_indentation_options,
# INITIALIZER: sub initialize_lpxl_lpil
%line_up_parentheses_control_hash,
$line_up_parentheses_control_is_lpxl,
# INITIALIZER: sub outdent_keyword
%outdent_keyword,
# INITIALIZER: sub initialize_keyword_paren_inner_tightness
%keyword_paren_inner_tightness,
# These can be modified by grep-alias-list
# INITIALIZER: sub initialize_grep_and_friends
%is_sort_map_grep,
%is_sort_map_grep_eval,
%is_sort_map_grep_eval_do,
%is_block_with_ci,
%is_keyword_returning_list,
%block_type_map, # initialized in BEGIN, but may be changed
%want_one_line_block, # may be changed in prepare_cuddled_block_types
# INITIALIZER: sub prepare_cuddled_block_types
$rcuddled_block_types,
# INITIALIZER: sub initialize_whitespace_hashes
%binary_ws_rules,
%want_left_space,
%want_right_space,
# INITIALIZER: sub initialize_bond_strength_hashes
%right_bond_strength,
%left_bond_strength,
# INITIALIZER: sub initialize_token_break_preferences
%want_break_before,
%break_before_container_types,
# INITIALIZER: sub initialize_space_after_keyword
%space_after_keyword,
# INITIALIZER: sub initialize_extended_block_tightness_list
%extended_block_tightness_list,
# INITIALIZED BY initialize_global_option_vars
%opening_vertical_tightness,
%closing_vertical_tightness,
%closing_token_indentation,
$some_closing_token_indentation,
%opening_token_right,
%stack_opening_token,
%stack_closing_token,
# INITIALIZER: sub initialize_weld_nested_exclusion_rules
%weld_nested_exclusion_rules,
# INITIALIZER: sub initialize_weld_fat_comma_rules
%weld_fat_comma_rules,
# INITIALIZER: sub initialize_trailing_comma_rules
%trailing_comma_rules,
# INITIALIZER: sub initialize_trailing_comma_break_rules
%trailing_comma_break_rules,
# INITIALIZER: sub initialize_interbracket_arrow_style
%interbracket_arrow_style,
# INITIALIZER: sub initialize_call_paren_style
%call_paren_style,
# INITIALIZER: sub initialize_pack_operator_types
%pack_operator_types,
# INITIALIZER: sub initialize_warn_variable_types
$rwarn_variable_types,
$ris_warn_variable_excluded_name,
# INITIALIZER: sub initialize_warn_mismatched_args
$rwarn_mismatched_arg_types,
$ris_warn_mismatched_arg_excluded_name,
# INITIALIZER: sub initialize_warn_mismatched_returns
$rwarn_mismatched_return_types,
$ris_warn_mismatched_return_excluded_name,
# regex patterns for text identification.
# Most can be configured by user parameters.
# Most are initialized in a sub make_**_pattern during configuration.
# INITIALIZER: sub make_sub_matching_pattern
$SUB_PATTERN,
$ASUB_PATTERN,
%matches_ASUB,
# INITIALIZER: make_static_block_comment_pattern
$static_block_comment_pattern,
# INITIALIZER: sub make_static_side_comment_pattern
$static_side_comment_pattern,
# INITIALIZER: make_format_skipping_pattern
$format_skipping_pattern_begin,
$format_skipping_pattern_end,
# INITIALIZER: sub make_non_indenting_brace_pattern
$non_indenting_brace_pattern,
# INITIALIZER: sub make_bl_pattern
$bl_exclusion_pattern,
# INITIALIZER: make_bl_pattern
$bl_pattern,
# INITIALIZER: sub make_bli_pattern
$bli_exclusion_pattern,
# INITIALIZER: sub make_bli_pattern
$bli_pattern,
# INITIALIZER: sub make_block_brace_vertical_tightness_pattern
$block_brace_vertical_tightness_pattern,
# INITIALIZER: sub make_blank_line_pattern
$blank_lines_after_opening_block_pattern,
$blank_lines_before_closing_block_pattern,
# INITIALIZER: sub make_keyword_group_list_pattern
$keyword_group_list_pattern,
$keyword_group_list_comment_pattern,
# INITIALIZER: sub initialize_keep_old_blank_lines_hash
%keep_old_blank_lines_exceptions,
# INITIALIZER: sub make_closing_side_comment_prefix
$closing_side_comment_prefix_pattern,
# INITIALIZER: sub make_closing_side_comment_list_pattern
$closing_side_comment_list_pattern,
$closing_side_comment_want_asub,
$closing_side_comment_exclusion_pattern,
# Table to efficiently find indentation and max line length
# from level.
# INITIALIZER: sub initialize_line_length_vars
@maximum_line_length_at_level,
@maximum_text_length_at_level,
$stress_level_alpha,
$stress_level_beta,
$high_stress_level,
# Total number of sequence items in a weld, for quick checks
# INITIALIZER: weld_containers
$total_weld_count,
#--------------------------------------------------------
# Section 2: Work arrays for the current batch of tokens.
#--------------------------------------------------------
# These are re-initialized for each batch of code
# INITIALIZER: sub initialize_batch_variables
$max_index_to_go,
@block_type_to_go,
@type_sequence_to_go,
@forced_breakpoint_to_go,
@token_lengths_to_go,
@summed_lengths_to_go,
@levels_to_go,
@leading_spaces_to_go,
@reduced_spaces_to_go,
@mate_index_to_go,
@ci_levels_to_go,
@nesting_depth_to_go,
@nobreak_to_go,
@old_breakpoint_to_go,
@tokens_to_go,
@K_to_go,
@types_to_go,
@inext_to_go,
@parent_seqno_to_go,
# forced breakpoint variables associated with each batch of code
$forced_breakpoint_count,
$forced_breakpoint_undo_count,
$index_max_forced_break,
);
BEGIN {
# Index names for token variables.
# Do not combine with other BEGIN blocks (c101).
my $i = 0;
use constant {
_CI_LEVEL_ => $i++,
_CUMULATIVE_LENGTH_ => $i++,
_LINE_INDEX_ => $i++,
_LEVEL_ => $i++,
_TOKEN_ => $i++,
_TOKEN_LENGTH_ => $i++,
_TYPE_ => $i++,
_TYPE_SEQUENCE_ => $i++,
# Number of token variables; must be last in list:
_NVARS => $i++,
};
} ## end BEGIN
BEGIN {
# Index names for $self variables.
# Do not combine with other BEGIN blocks (c101).
my $i = 0;
use constant {
_rlines_ => $i++,
_rLL_ => $i++,
_Klimit_ => $i++,
_rdepth_of_opening_seqno_ => $i++,
_rSS_ => $i++,
_rI_opening_ => $i++,
_rI_closing_ => $i++,
_rK_next_seqno_by_K_ => $i++,
_rblock_type_of_seqno_ => $i++,
_ris_asub_block_ => $i++,
_ris_sub_block_ => $i++,
_K_opening_container_ => $i++,
_K_closing_container_ => $i++,
_K_opening_ternary_ => $i++,
_K_closing_ternary_ => $i++,
_rK_sequenced_token_list_ => $i++,
_rtype_count_by_seqno_ => $i++,
_ris_function_call_paren_ => $i++,
_rlec_count_by_seqno_ => $i++,
_ris_broken_container_ => $i++,
_ris_permanently_broken_ => $i++,
_rblank_and_comment_count_ => $i++,
_rhas_list_ => $i++,
_rhas_broken_list_ => $i++,
_rhas_broken_list_with_lec_ => $i++,
_rfirst_comma_line_index_ => $i++,
_rhas_code_block_ => $i++,
_rhas_broken_code_block_ => $i++,
_rhas_ternary_ => $i++,
_ris_excluded_lp_container_ => $i++,
_rlp_object_by_seqno_ => $i++,
_rwant_reduced_ci_ => $i++,
_rno_xci_by_seqno_ => $i++,
_rbrace_left_ => $i++,
_ris_bli_container_ => $i++,
_rparent_of_seqno_ => $i++,
_rchildren_of_seqno_ => $i++,
_ris_list_by_seqno_ => $i++,
_ris_cuddled_closing_brace_ => $i++,
_rbreak_container_ => $i++,
_rshort_nested_ => $i++,
_length_function_ => $i++,
_is_encoded_data_ => $i++,
_fh_tee_ => $i++,
_sink_object_ => $i++,
_file_writer_object_ => $i++,
_vertical_aligner_object_ => $i++,
_logger_object_ => $i++,
_radjusted_levels_ => $i++,
_ris_special_identifier_token_ => $i++,
_last_output_short_opening_token_ => $i++,
_last_line_leading_type_ => $i++,
_last_line_leading_level_ => $i++,
_added_semicolon_count_ => $i++,
_first_added_semicolon_at_ => $i++,
_last_added_semicolon_at_ => $i++,
_deleted_semicolon_count_ => $i++,
_first_deleted_semicolon_at_ => $i++,
_last_deleted_semicolon_at_ => $i++,
_embedded_tab_count_ => $i++,
_first_embedded_tab_at_ => $i++,
_last_embedded_tab_at_ => $i++,
_first_tabbing_disagreement_ => $i++,
_last_tabbing_disagreement_ => $i++,
_tabbing_disagreement_count_ => $i++,
_in_tabbing_disagreement_ => $i++,
_first_brace_tabbing_disagreement_ => $i++,
_in_brace_tabbing_disagreement_ => $i++,
_saw_VERSION_in_this_file_ => $i++,
_saw_use_strict_ => $i++,
_saw_END_or_DATA_ => $i++,
_rK_weld_left_ => $i++,
_rK_weld_right_ => $i++,
_rweld_len_right_at_K_ => $i++,
_rspecial_side_comment_type_ => $i++,
_rseqno_controlling_my_ci_ => $i++,
_ris_seqno_controlling_ci_ => $i++,
_save_logfile_ => $i++,
_maximum_level_ => $i++,
_maximum_level_at_line_ => $i++,
_maximum_BLOCK_level_ => $i++,
_maximum_BLOCK_level_at_line_ => $i++,
_rKrange_code_without_comments_ => $i++,
_rbreak_before_Kfirst_ => $i++,
_rbreak_after_Klast_ => $i++,
_converged_ => $i++,
_want_second_iteration_ => $i++,
_rstarting_multiline_qw_seqno_by_K_ => $i++,
_rending_multiline_qw_seqno_by_K_ => $i++,
_rKrange_multiline_qw_by_seqno_ => $i++,
_rmultiline_qw_has_extra_level_ => $i++,
_ris_qwaf_by_seqno_ => $i++,
_rcollapsed_length_by_seqno_ => $i++,
_rbreak_before_container_by_seqno_ => $i++,
_roverride_cab3_ => $i++,
_ris_assigned_structure_ => $i++,
_ris_short_broken_eval_block_ => $i++,
_ris_bare_trailing_comma_by_seqno_ => $i++,
_rtightness_override_by_seqno_ => $i++,
_rseqno_non_indenting_brace_by_ix_ => $i++,
_rmax_vertical_tightness_ => $i++,
_no_vertical_tightness_flags_ => $i++,
_last_vt_type_ => $i++,
_rwant_arrow_before_seqno_ => $i++,
_rseqno_arrow_call_chain_start_ => $i++,
_rarrow_call_chain_ => $i++,
# these vars are defined after call to respace tokens:
_rK_package_list_ => $i++,
_rK_AT_underscore_by_sub_seqno_ => $i++,
_rK_first_self_by_sub_seqno_ => $i++,
_rK_bless_by_sub_seqno_ => $i++,
_rK_return_by_sub_seqno_ => $i++,
_rK_wantarray_by_sub_seqno_ => $i++,
_rK_sub_by_seqno_ => $i++,
_ris_my_sub_by_seqno_ => $i++,
_rsub_call_paren_info_by_seqno_ => $i++,
_rDOLLAR_underscore_by_sub_seqno_ => $i++,
_this_batch_ => $i++,
_LAST_SELF_INDEX_ => $i - 1,
};
} ## end BEGIN
BEGIN {
# Index names for variables stored in _this_batch_.
# Do not combine with other BEGIN blocks (c101).
my $i = 0;
use constant {
_starting_in_quote_ => $i++,
_ending_in_quote_ => $i++,
_is_static_block_comment_ => $i++,
_ri_first_ => $i++,
_ri_last_ => $i++,
_do_not_pad_ => $i++,
_peak_batch_size_ => $i++,
_batch_count_ => $i++,
_rix_seqno_controlling_ci_ => $i++,
_batch_CODE_type_ => $i++,
_ri_starting_one_line_block_ => $i++,
_runmatched_opening_indexes_ => $i++,
};
} ## end BEGIN
BEGIN {
# Sequence number assigned to the root of sequence tree.
# The minimum of the actual sequences numbers is 4, so we can use 1
use constant SEQ_ROOT => 1;
# Codes for insertion and deletion of blanks
use constant DELETE => 0;
use constant STABLE => 1;
use constant INSERT => 2;
# whitespace codes
use constant WS_YES => 1;
use constant WS_OPTIONAL => 0;
use constant WS_NO => -1;
# Token bond strengths.
use constant NO_BREAK => 10_000;
use constant VERY_STRONG => 100;
use constant STRONG => 2.1;
use constant NOMINAL => 1.1;
use constant WEAK => 0.8;
use constant VERY_WEAK => 0.55;
# values for testing indexes in output array
use constant UNDEFINED_INDEX => -1;
# Maximum number of little messages; probably need not be changed.
use constant MAX_NAG_MESSAGES => 6;
# This is the decimal range of printable characters in ASCII. It is used to
# make quick preliminary checks before resorting to using a regex.
use constant ORD_PRINTABLE_MIN => 33;
use constant ORD_PRINTABLE_MAX => 126;
# Initialize constant hashes ...
my @q;
@q = qw( = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= );
@is_assignment{@q} = (1) x scalar(@q);
# a hash needed by break_lists for efficiency:
push @q, qw{ ; < > ~ f };
@is_non_list_type{@q} = (1) x scalar(@q);
@q = qw( is if unless and or err last next redo return );
@is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
# These block types may have text between the keyword and opening
# curly. Note: 'else' does not, but must be included to allow trailing
# if/elsif text to be appended.
# patch for SWITCH/CASE: added 'case' and 'when'
@q = qw( if elsif else unless while until for foreach case when catch );
@is_if_elsif_else_unless_while_until_for_foreach{@q} =
(1) x scalar(@q);
# These can either have the BLOCK form or trailing modifier form:
@q = qw( if unless while until for foreach );
@is_if_unless_while_until_for_foreach{@q} =
(1) x scalar(@q);
# These can have several forms
@q = qw( for foreach );
@is_for_foreach{@q} = (1) x scalar(@q);
@q = qw( last next redo return );
@is_last_next_redo_return{@q} = (1) x scalar(@q);
# Map related block names into a common name to allow vertical alignment
# used by sub make_alignment_patterns. Note: this is normally unchanged,
# but it contains 'grep' and can be re-initialized in
# sub initialize_grep_and_friends in a testing mode.
%block_type_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'if',
'default' => 'if',
'case' => 'if',
'sort' => 'map',
'grep' => 'map',
);
@q = qw( if unless );
@is_if_unless{@q} = (1) x scalar(@q);
@q = qw( if elsif );
@is_if_elsif{@q} = (1) x scalar(@q);
@q = qw( if unless elsif );
@is_if_unless_elsif{@q} = (1) x scalar(@q);
@q = qw( if unless elsif else );
@is_if_unless_elsif_else{@q} = (1) x scalar(@q);
@q = qw( elsif else );
@is_elsif_else{@q} = (1) x scalar(@q);
@q = qw( and or err );
@is_and_or{@q} = (1) x scalar(@q);
# Identify certain operators which often occur in chains.
# Note: the minus (-) causes a side effect of padding of the first line in
# something like this (by sub set_logical_padding):
# Checkbutton => 'Transmission checked',
# -variable => \$TRANS
# This usually improves appearance so it seems ok.
@q = qw( && || and or : ? . + - * / );
@is_chain_operator{@q} = (1) x scalar(@q);
# Operators that the user can request break before or after.
# Note that some are keywords
@all_operators = qw{
% + - * / x != == >= <= =~ !~ < > | &
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
. : ? && || and or err xor
};
# We can remove semicolons after blocks preceded by these keywords
@q = qw(
BEGIN END CHECK INIT AUTOLOAD DESTROY
UNITCHECK continue if elsif else unless
while until for foreach given when
default
);
@is_block_without_semicolon{@q} = (1) x scalar(@q);
# We will allow semicolons to be added within these block types
# as well as sub and package blocks.
# NOTES:
# 1. Note that these keywords are omitted:
# switch case given when default sort map grep
# 2. It is also ok to add for sub and package blocks and a labeled block
# 3. But not okay for other perltidy types including:
# { } ; G t
# 4. Test files: blktype.t, blktype1.t, semicolon.t
@q = qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif
else unless do while until eval for foreach );
@ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
# 'L' is token for opening { at hash key
@q = qw< L { ( [ >;
@is_opening_type{@q} = (1) x scalar(@q);
# 'R' is token for closing } at hash key
@q = qw< R } ) ] >;
@is_closing_type{@q} = (1) x scalar(@q);
@q = qw< { ( [ >;
@is_opening_token{@q} = (1) x scalar(@q);
@q = qw< } ) ] >;
@is_closing_token{@q} = (1) x scalar(@q);
@q = qw( ? : );
@is_ternary{@q} = (1) x scalar(@q);
@q = qw< { ( [ ? >;
@is_opening_sequence_token{@q} = (1) x scalar(@q);
@q = qw< } ) ] : >;
@is_closing_sequence_token{@q} = (1) x scalar(@q);
%matching_token = (
'{' => '}',
'(' => ')',
'[' => ']',
'?' => ':',
'}' => '{',
')' => '(',
']' => '[',
':' => '?',
);
# a hash needed by sub break_lists for labeling containers
@q = qw( k => && || ? : . );
@is_container_label_type{@q} = (1) x scalar(@q);
@q = qw( die confess croak warn );
@is_die_confess_croak_warn{@q} = (1) x scalar(@q);
@q = qw( my our local );
@is_my_our_local{@q} = (1) x scalar(@q);
# Braces -bbht etc must follow these. Note: experimentation with
# including a simple comma shows that it adds little and can lead
# to poor formatting in complex lists.
@q = qw( = => );
@is_equal_or_fat_comma{@q} = (1) x scalar(@q);
@q = qw( => ; h f );
push @q, ',';
@is_counted_type{@q} = (1) x scalar(@q);
# Tokens where --keep-old-break-xxx flags make soft breaks instead
# of hard breaks. See b1433 and b1436.
# NOTE: $type is used as the hash key for now; if other container tokens
# are added it might be necessary to use a token/type mixture.
@q = qw# -> ? : && || + - / * #;
@is_soft_keep_break_type{@q} = (1) x scalar(@q);
# these functions allow an identifier in the indirect object slot
@q = qw( print printf sort exec system say );
@is_indirect_object_taker{@q} = (1) x scalar(@q);
# Define here tokens which may follow the closing brace of a do statement
# on the same line, as in:
# } while ( $something);
my @dof = qw( until while unless if ; : );
push @dof, ',';
@is_do_follower{@dof} = (1) x scalar(@dof);
# what can follow a multi-line anonymous sub definition closing curly:
my @asf = qw# ; : => or and && || ~~ !~~ ) #;
push @asf, ',';
@is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
# what can follow a one-line anonymous sub closing curly:
# one-line anonymous subs also have ']' here...
# see tk3.t and PP.pm
my @asf1 = qw# ; : => or and && || ) ] ~~ !~~ #;
push @asf1, ',';
@is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
# What can follow a closing curly of a block
# which is not an if/elsif/else/do/sort/map/grep/eval/sub
# Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
my @obf = qw# ; : => or and && || ) #;
push @obf, ',';
@is_other_brace_follower{@obf} = (1) x scalar(@obf);
# 'k'=builtin keyword, 'U'=user defined sub, 'w'=unknown bareword
@q = qw( k w U );
@is_kwU{@q} = (1) x scalar(@q);
# regular expression match operators
@q = qw( =~ !~);
@is_re_match_op{@q} = (1) x scalar(@q);
@q = qw ( my state our );
@is_my_state_our{@q} = (1) x scalar(@q);
# These keywords have prototypes which allow a special leading item
# followed by a list
@q =
qw( chmod formline grep join kill map pack printf push sprintf unshift );
@is_keyword_with_special_leading_term{@q} = (1) x scalar(@q);
# used to check for certain token quote types
@q = qw( s y m / );
@is_s_y_m_slash{@q} = (1) x scalar(@q);
@q = qw( $ & % * @ );
@is_sigil{@q} = (1) x scalar(@q);
} ## end BEGIN
{ ## begin closure to count instances
# methods to count instances
my $_count = 0;
sub _increment_count { return ++$_count }
sub _decrement_count { return --$_count }
} ## end closure to count instances
sub new {
my ( $class, @arglist ) = @_;
if ( @arglist % 2 ) { croak "Odd number of items in arg hash list\n" }
# we are given an object with a write_line() method to take lines
my %defaults = (
sink_object => undef,
diagnostics_object => undef,
logger_object => undef,
length_function => undef,
is_encoded_data => EMPTY_STRING,
fh_tee => undef,
);
my %args = ( %defaults, @arglist );
my $length_function = $args{length_function};
my $is_encoded_data = $args{is_encoded_data};
my $fh_tee = $args{fh_tee};
my $logger_object = $args{logger_object};
my $diagnostics_object = $args{diagnostics_object};
# we create another object with a get_line() and peek_ahead() method
my $sink_object = $args{sink_object};
my $file_writer_object =
Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
# initialize closure variables...
set_logger_object($logger_object);
set_diagnostics_object($diagnostics_object);
initialize_lp_vars();
initialize_csc_vars();
initialize_break_lists();
initialize_undo_ci();
initialize_process_line_of_CODE();
initialize_grind_batch_of_CODE();
initialize_get_final_indentation();
initialize_postponed_breakpoint();
initialize_batch_variables();
initialize_write_line();
my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
rOpts => $rOpts,
file_writer_object => $file_writer_object,
logger_object => $logger_object,
diagnostics_object => $diagnostics_object,
);
write_logfile_entry("\nStarting tokenization pass...\n");
if ( $rOpts->{'entab-leading-whitespace'} ) {
write_logfile_entry(
"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
);
}
elsif ( $rOpts->{'tabs'} ) {
write_logfile_entry("Indentation will be with a tab character\n");
}
else {
write_logfile_entry(
"Indentation will be with $rOpts->{'indent-columns'} spaces\n");
}
# Initialize the $self array reference.
# To add an item, first add a constant index in the BEGIN block above.
my $self = [];
bless $self, $class;
# Basic data structures...
$self->[_rlines_] = []; # = ref to array of lines of the file
# 'rLL' = reference to the continuous liner array of all tokens in a file.
# 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
# 'LL' stuck because it is easy to type. The 'rLL' array is updated
# by sub 'respace_tokens' during reformatting. The indexes in 'rLL' begin
# with '$K' by convention.
$self->[_rLL_] = [];
$self->[_Klimit_] = undef; # = maximum K index for rLL.
# Indexes into the rLL list
$self->[_K_opening_container_] = {};
$self->[_K_closing_container_] = {};
$self->[_K_opening_ternary_] = {};
$self->[_K_closing_ternary_] = {};
# A list of index K of sequenced tokens to allow loops over them all
$self->[_rK_sequenced_token_list_] = [];
# 'rSS' is the 'Signed Sequence' list, a continuous list of all sequence
# numbers with + or - indicating opening or closing. This list represents
# the entire container tree and is invariant under reformatting. It can be
# used to quickly travel through the tree. Indexes in the rSS array begin
# with '$I' by convention.
$self->[_rSS_] = [];
$self->[_rI_opening_] = [];
$self->[_rI_closing_] = [];
$self->[_rK_next_seqno_by_K_] = [];
# Arrays to help traverse the tree
$self->[_rdepth_of_opening_seqno_] = [];
$self->[_rblock_type_of_seqno_] = {};
$self->[_ris_asub_block_] = {};
$self->[_ris_sub_block_] = {};
# Variables for --warn-mismatched-args and
# --dump-mismatched-args
# --dump-mismatched-returns
# --warn-mismatched-returns
$self->[_rK_package_list_] = [];
$self->[_rK_AT_underscore_by_sub_seqno_] = {};
$self->[_rK_first_self_by_sub_seqno_] = {};
$self->[_rK_bless_by_sub_seqno_] = {};
$self->[_rK_return_by_sub_seqno_] = {};
$self->[_rK_wantarray_by_sub_seqno_] = {};
$self->[_rsub_call_paren_info_by_seqno_] = {};
$self->[_rDOLLAR_underscore_by_sub_seqno_] = {};
$self->[_rK_sub_by_seqno_] = {};
$self->[_ris_my_sub_by_seqno_] = {};
$self->[_this_batch_] = [];
# Mostly list characteristics and processing flags
$self->[_rtype_count_by_seqno_] = {};
$self->[_ris_function_call_paren_] = {};
$self->[_rlec_count_by_seqno_] = {};
$self->[_ris_broken_container_] = {};
$self->[_ris_permanently_broken_] = {};
$self->[_rblank_and_comment_count_] = {};
$self->[_rhas_list_] = {};
$self->[_rhas_broken_list_] = {};
$self->[_rhas_broken_list_with_lec_] = {};
$self->[_rfirst_comma_line_index_] = {};
$self->[_rhas_code_block_] = {};
$self->[_rhas_broken_code_block_] = {};
$self->[_rhas_ternary_] = {};
$self->[_ris_excluded_lp_container_] = {};
$self->[_rlp_object_by_seqno_] = {};
$self->[_rwant_reduced_ci_] = {};
$self->[_rno_xci_by_seqno_] = {};
$self->[_rbrace_left_] = {};
$self->[_ris_bli_container_] = {};
$self->[_rparent_of_seqno_] = {};
$self->[_rchildren_of_seqno_] = {};
$self->[_ris_list_by_seqno_] = {};
$self->[_ris_cuddled_closing_brace_] = {};
$self->[_rbreak_container_] = {}; # prevent one-line blocks
$self->[_rshort_nested_] = {}; # blocks not forced open
$self->[_length_function_] = $length_function;
$self->[_is_encoded_data_] = $is_encoded_data;
# Some objects...
$self->[_fh_tee_] = $fh_tee;
$self->[_sink_object_] = $sink_object;
$self->[_file_writer_object_] = $file_writer_object;
$self->[_vertical_aligner_object_] = $vertical_aligner_object;
$self->[_logger_object_] = $logger_object;
# Memory of processed text...
$self->[_ris_special_identifier_token_] = {};
$self->[_last_line_leading_level_] = 0;
$self->[_last_line_leading_type_] = '#';
$self->[_last_output_short_opening_token_] = 0;
$self->[_added_semicolon_count_] = 0;
$self->[_first_added_semicolon_at_] = 0;
$self->[_last_added_semicolon_at_] = 0;
$self->[_deleted_semicolon_count_] = 0;
$self->[_first_deleted_semicolon_at_] = 0;
$self->[_last_deleted_semicolon_at_] = 0;
$self->[_embedded_tab_count_] = 0;
$self->[_first_embedded_tab_at_] = 0;
$self->[_last_embedded_tab_at_] = 0;
$self->[_first_tabbing_disagreement_] = 0;
$self->[_last_tabbing_disagreement_] = 0;
$self->[_tabbing_disagreement_count_] = 0;
$self->[_in_tabbing_disagreement_] = 0;
$self->[_saw_VERSION_in_this_file_] = !$rOpts->{'pass-version-line'};
$self->[_saw_use_strict_] = 0;
$self->[_saw_END_or_DATA_] = 0;
$self->[_first_brace_tabbing_disagreement_] = undef;
$self->[_in_brace_tabbing_disagreement_] = undef;
# Hashes related to container welding...
$self->[_radjusted_levels_] = [];
# Weld data structures
$self->[_rK_weld_left_] = {};
$self->[_rK_weld_right_] = {};
$self->[_rweld_len_right_at_K_] = {};
# -xci stuff
$self->[_rseqno_controlling_my_ci_] = {};
$self->[_ris_seqno_controlling_ci_] = {};
$self->[_rspecial_side_comment_type_] = {};
$self->[_maximum_level_] = 0;
$self->[_maximum_level_at_line_] = 0;
$self->[_maximum_BLOCK_level_] = 0;
$self->[_maximum_BLOCK_level_at_line_] = 0;
$self->[_rKrange_code_without_comments_] = [];
$self->[_rbreak_before_Kfirst_] = {};
$self->[_rbreak_after_Klast_] = {};
$self->[_converged_] = 0;
$self->[_want_second_iteration_] = 0;
# qw stuff
$self->[_rstarting_multiline_qw_seqno_by_K_] = {};
$self->[_rending_multiline_qw_seqno_by_K_] = {};
$self->[_rKrange_multiline_qw_by_seqno_] = {};
$self->[_rmultiline_qw_has_extra_level_] = {};
$self->[_ris_qwaf_by_seqno_] = {};
$self->[_rcollapsed_length_by_seqno_] = {};
$self->[_rbreak_before_container_by_seqno_] = {};
$self->[_roverride_cab3_] = {};
$self->[_ris_assigned_structure_] = {};
$self->[_ris_short_broken_eval_block_] = {};
$self->[_ris_bare_trailing_comma_by_seqno_] = {};
$self->[_rtightness_override_by_seqno_] = {};
$self->[_rseqno_non_indenting_brace_by_ix_] = {};
$self->[_rmax_vertical_tightness_] = {};
$self->[_no_vertical_tightness_flags_] = 0;
$self->[_last_vt_type_] = 0;
$self->[_rwant_arrow_before_seqno_] = {};
$self->[_rseqno_arrow_call_chain_start_] = {};
$self->[_rarrow_call_chain_] = {};
$self->[_save_logfile_] =
defined($logger_object) && $logger_object->get_save_logfile();
# Be sure all variables in $self have been initialized above. To find the
# correspondence of index numbers and array names, copy a list to a file
# and use the unix 'nl' command to number lines 1..
if (DEVEL_MODE) {
my @non_existant;
foreach ( 0 .. _LAST_SELF_INDEX_ ) {
if ( !exists $self->[$_] ) {
push @non_existant, $_;
}
}
if (@non_existant) {
Fault("These indexes in self not initialized: (@non_existant)\n");
}
}
# Safety check..this is not a class yet
if ( _increment_count() > 1 ) {
confess
"Attempt to create more than 1 object in $class, which is not a true class yet\n";
}
return $self;
} ## end sub new
######################################
# CODE SECTION 2: Some Basic Utilities
######################################
sub check_rLL {
# Verify that the rLL array has not been auto-vivified
my ( $self, $msg ) = @_;
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $num = @{$rLL};
if ( ( defined($Klimit) && $Klimit != $num - 1 )
|| ( !defined($Klimit) && $num > 0 ) )
{
# This fault can occur if the array has been accessed for an index
# greater than $Klimit, which is the last token index. Just accessing
# the array above index $Klimit, not setting a value, can cause @rLL to
# increase beyond $Klimit. If this occurs, the problem can be located
# by making calls to this routine at different locations in
# sub 'finish_formatting'.
$Klimit = 'undef' if ( !defined($Klimit) );
$msg = EMPTY_STRING unless $msg;
Fault("$msg ERROR: rLL has num=$num but Klimit='$Klimit'\n");
}
return;
} ## end sub check_rLL
sub check_keys {
my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
# Check the keys of a hash:
# $rtest = ref to hash to test
# $rvalid = ref to hash with valid keys
# $msg = a message to write in case of error
# $exact_match defines the type of check:
# = false: test hash must not have unknown key
# = true: test hash must have exactly same keys as known hash
my @unknown_keys =
grep { !exists $rvalid->{$_} } keys %{$rtest};
my @missing_keys =
grep { !exists $rtest->{$_} } keys %{$rvalid};
my $error = @unknown_keys;
if ($exact_match) { $error ||= @missing_keys }
if ($error) {
local $LIST_SEPARATOR = ')(';
my @expected_keys = sort keys %{$rvalid};
@unknown_keys = sort @unknown_keys;
Fault(<<EOM);
------------------------------------------------------------------------
Program error detected checking hash keys
Message is: '$msg'
Expected keys: (@expected_keys)
Unknown key(s): (@unknown_keys)
Missing key(s): (@missing_keys)
------------------------------------------------------------------------
EOM
}
return;
} ## end sub check_keys
sub check_token_array {
my $self = shift;
#--------------
# Check @{$rLL}
#--------------
# Check for errors in the array of tokens. This is only called
# when the DEVEL_MODE flag is set, so this Fault will only occur
# during code development.
my $rLL = $self->[_rLL_];
foreach my $KK ( 0 .. @{$rLL} - 1 ) {
my $nvars = @{ $rLL->[$KK] };
if ( $nvars != _NVARS ) {
my $NVARS = _NVARS;
my $type = $rLL->[$KK]->[_TYPE_];
$type = '*' unless defined($type);
# The number of variables per token node is _NVARS and was set when
# the array indexes were generated. So if the number of variables
# is different we have done something wrong, like not store all of
# them in sub 'write_line' when they were received from the
# tokenizer.
Fault(
"number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
);
}
foreach my $var ( _TOKEN_, _TYPE_ ) {
if ( !defined( $rLL->[$KK]->[$var] ) ) {
my $iline = $rLL->[$KK]->[_LINE_INDEX_];
# This is a simple check that each token has some basic
# variables. In other words, that there are no holes in the
# array of tokens. Sub 'write_line' pushes tokens into the
# $rLL array, so this should guarantee no gaps.
Fault("Undefined variable $var for K=$KK, line=$iline\n");
}
}
}
#---------------------------------
# Check $rK_next_seqno_by_K->[$KK]
#---------------------------------
my $Klimit = @{$rLL} - 1;
my $K_last_seqno;
my $rK_next_seqno_by_K = $self->[_rK_next_seqno_by_K_];
foreach my $KK ( 0 .. $Klimit ) {
my $K_next_seqno = $rK_next_seqno_by_K->[$KK];
if ( !defined($K_next_seqno) ) { $K_last_seqno = $KK; last }
if ( $K_next_seqno <= $KK || $K_next_seqno > $Klimit ) {
Fault(<<EOM);
Error detected in array rK_next_seqno_by_K with limit K=$Klimit:
at K=$KK the next seqno is $K_next_seqno
K_next_seqno = $K_next_seqno is Out of bounds
EOM
}
if ( !$rLL->[$K_next_seqno]->[_TYPE_SEQUENCE_] ) {
Fault(<<EOM);
Error detected in array rK_next_seqno_by_K with limit K=$Klimit:
at K=$KK the next seqno is $K_next_seqno:
K_next_seqno = $K_next_seqno does not have a sequence number
EOM
}
}
# upon hitting an undef, the remaining values should also be undef
if ( defined($K_last_seqno) ) {
foreach my $KK ( $K_last_seqno + 1 .. $Klimit ) {
my $Ktest = $rK_next_seqno_by_K->[$KK];
next if ( !defined($Ktest) );
Fault(<<EOM);
Error detected in array rK_next_seqno_by_K with limit K=$Klimit
with first undef at $K_last_seqno
at K=$KK the next seqno is defined and is $Ktest
EOM
}
}
#-----------------------------
# Check hash $rparent_of_seqno
#-----------------------------
my $rparent_of_seqno = $self->[_rparent_of_seqno_];
foreach my $seqno ( keys %{$rparent_of_seqno} ) {
# parent sequence numbers must always be less
my $seqno_parent = $rparent_of_seqno->{$seqno};
if ( $seqno_parent >= $seqno ) {
Fault(<<EOM);
Error detected in hash rparent_of_seqno:
The parent of seqno=$seqno is $seqno_parent but it should be less
EOM
}
}
return;
} ## end sub check_token_array
{ ## begin closure check_line_hashes
# This code checks that no auto-vivification occurs in the 'line' hash
my %valid_line_hash;
BEGIN {
# These keys are defined for each line in the formatter
# Each line must have exactly these quantities
my @valid_line_keys = qw(
_curly_brace_depth
_ending_in_quote
_guessed_indentation_level
_line_number
_line_text
_line_type
_paren_depth
_rK_range
_square_bracket_depth
_starting_in_quote
_ended_in_blank_token
_code_type
_ci_level_0
_level_0
_nesting_blocks_0
_nesting_tokens_0
);
@valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
} ## end BEGIN
sub check_line_hashes {
my $self = shift;
my $rlines = $self->[_rlines_];
# Note that the keys ending in _0 are only required when a logfile
# is being saved, so we will just check for unknown keys, but not
# require an exact match.
foreach my $rline ( @{$rlines} ) {
my $iline = $rline->{_line_number};
my $line_type = $rline->{_line_type};
check_keys( $rline, \%valid_line_hash,
"Checkpoint: line number =$iline, line_type=$line_type", 0 );
}
return;
} ## end sub check_line_hashes
} ## end closure check_line_hashes
{ ## begin closure for logger routines
my $logger_object;
# Called once per file to initialize the logger object
sub set_logger_object {
$logger_object = shift;
return;
}
sub get_input_stream_name {
my $input_stream_name = EMPTY_STRING;
if ($logger_object) {
$input_stream_name = $logger_object->get_input_stream_name();
}
return $input_stream_name;
} ## end sub get_input_stream_name
# interface to Perl::Tidy::Logger routines
sub warning {
my ( $msg, ($msg_line_number) ) = @_;
# Issue a warning message
# Given:
# $msg = text of warning
# $msg_line_number = optional line number prefix
if ($logger_object) {
$logger_object->warning( $msg, $msg_line_number );
}
return;
} ## end sub warning
sub complain {
my ( $msg, ($msg_line_number) ) = @_;
# Issue a complaint message
# Given:
# $msg = text of complaint
# $msg_line_number = optional line number prefix
if ($logger_object) {
$logger_object->complain( $msg, $msg_line_number );
}
return;
} ## end sub complain
sub write_logfile_entry {
my @msg = @_;
if ($logger_object) {
$logger_object->write_logfile_entry(@msg);
}
return;
} ## end sub write_logfile_entry
sub get_saw_brace_error {
if ($logger_object) {
return $logger_object->get_saw_brace_error();
}
return;
} ## end sub get_saw_brace_error
sub we_are_at_the_last_line {
if ($logger_object) {
$logger_object->we_are_at_the_last_line();
}
return;
} ## end sub we_are_at_the_last_line
} ## end closure for logger routines
{ ## begin closure for diagnostics routines
my $diagnostics_object;
# Called once per file to initialize the diagnostics object
sub set_diagnostics_object {
$diagnostics_object = shift;
return;
}
# Available for debugging but not currently used:
sub write_diagnostics {
my ( $msg, $line_number ) = @_;
if ($diagnostics_object) {
$diagnostics_object->write_diagnostics( $msg, $line_number );
}
return;
} ## end sub write_diagnostics
} ## end closure for diagnostics routines
sub get_convergence_check {
my ($self) = @_;
return $self->[_converged_];
}
sub want_second_iteration {
my ($self) = @_;
return $self->[_want_second_iteration_];
}
sub get_output_line_number {
my ($self) = @_;
my $vao = $self->[_vertical_aligner_object_];
return $vao->get_output_line_number();
}
sub want_blank_line {
my $self = shift;
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->want_blank_line();
return;
} ## end sub want_blank_line
sub write_unindented_line {
my ( $self, $line ) = @_;
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_line($line);
return;
} ## end sub write_unindented_line
sub dump_verbatim {
my $self = shift;
# Dump the input file to the output verbatim. This is called when
# there is a severe error and formatted output cannot be made.
my $rlines = $self->[_rlines_];
foreach my $line ( @{$rlines} ) {
my $input_line = $line->{_line_text};
$self->write_unindented_line($input_line);
}
return;
} ## end sub dump_verbatim
sub consecutive_nonblank_lines {
my ($self) = @_;
my $file_writer_object = $self->[_file_writer_object_];
my $vao = $self->[_vertical_aligner_object_];
return $file_writer_object->get_consecutive_nonblank_lines() +
$vao->get_cached_line_count();
} ## end sub consecutive_nonblank_lines
sub split_words {
# Given: a string containing words separated by whitespace,
# Return: the corresponding list of words
my ($str) = @_;
return unless defined($str);
$str =~ s/\s+$//;
$str =~ s/^\s+//;
return unless length($str);
return split /\s+/, $str;
} ## end sub split_words
sub K_next_code {
my ( $self, $KK, ($rLL) ) = @_;
# Given:
# $KK = index of a token in $rLL
# $rLL = optional token array to use (default is $self->[_rLL_])
# Return:
# The index of the next nonblank, non-comment token after $KK, or
# undef if none
return if ( !defined($KK) );
return if ( $KK < 0 );
# The optional third arg is useful when we are copying tokens from an old
# $rLL to a new $rLL array.
$rLL = $self->[_rLL_] if ( !defined($rLL) );
my $Num = @{$rLL};
while ( ++$KK < $Num ) {
my $type = $rLL->[$KK]->[_TYPE_];
if ( $type ne 'b' && $type ne '#' ) {
return $KK;
}
} ## end while ( ++$KK < $Num )
return;
} ## end sub K_next_code
sub K_next_nonblank {
my ( $self, $KK, ($rLL) ) = @_;
# Given:
# $KK = index of a token in $rLL
# $rLL = optional token array to use (default is $self->[_rLL_])
# Return:
# The index of the next nonblank token after $KK, or
# undef if none
# NOTE: does not skip over the leading type 'q' of a hanging side comment
# (use K_next_code)
return if ( !defined($KK) );
return if ( $KK < 0 );
# use the standard array unless given otherwise
$rLL = $self->[_rLL_] if ( !defined($rLL) );
# Normally, consecutive blanks do not occur. We could test for that
# here, but there are checks in the 'store_token' subs.
my $Num = @{$rLL};
while ( ++$KK < $Num ) {
if ( $rLL->[$KK]->[_TYPE_] ne 'b' ) { return $KK }
}
return;
} ## end sub K_next_nonblank
sub K_previous_code {
my ( $self, $KK, ($rLL) ) = @_;
# Given:
# $KK = index of a token in $rLL
# $rLL = optional token array to use (default is $self->[_rLL_])
# Return:
# The index of the previous nonblank, non-comment token after $KK, or
# undef if none
# Call with $KK=undef to start search at the top of the array
# The optional third arg is useful when we are copying tokens from an old
# $rLL to a new $rLL array.
$rLL = $self->[_rLL_] unless ( defined($rLL) );
my $Num = @{$rLL};
if ( !defined($KK) ) { $KK = $Num }
if ( $KK > $Num ) {
# This fault can be caused by a programming error in which a bad $KK is
# given. The caller should make the first call with KK_new=undef to
# avoid this error.
if (DEVEL_MODE) {
Fault(<<EOM);
Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num
EOM
}
return;
}
while ( --$KK >= 0 ) {
my $type = $rLL->[$KK]->[_TYPE_];
if ( $type ne 'b' && $type ne '#' ) { return $KK }
}
return;
} ## end sub K_previous_code
sub K_previous_nonblank {
my ( $self, $KK, ($rLL) ) = @_;
# Given:
# $KK = index of a token in $rLL
# $rLL = optional token array to use (default is $self->[_rLL_])
# Return:
# The index of the previous nonblank token after $KK, or
# undef if none
# Call with $KK=undef to start search at the top of the array
# NOTE: does not skip over the leading type 'q' of a hanging side comment
# (use K_previous_code)
# use the standard array unless given otherwise
$rLL = $self->[_rLL_] unless ( defined($rLL) );
my $Num = @{$rLL};
if ( !defined($KK) ) { $KK = $Num }
if ( $KK > $Num ) {
# This fault can be caused by a programming error in which a bad $KK is
# given. The caller should make the first call with KK_new=undef to
# avoid this error.
if (DEVEL_MODE) {
Fault(<<EOM);
Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num
EOM
}
return;
}
# Normally, consecutive blanks do not occur. We could test for that
# here, but there are checks in the 'store_token' subs.
while ( --$KK >= 0 ) {
if ( $rLL->[$KK]->[_TYPE_] ne 'b' ) { return $KK }
}
return;
} ## end sub K_previous_nonblank
sub K_first_code {
my ( $self, ($rLL) ) = @_;
# Given:
# $rLL = optional token array to override default
# Return:
# index $K of first non-blank, non-comment code token, or
# undef if none (no tokens in the file)
$rLL = $self->[_rLL_] unless ( defined($rLL) );
return unless @{$rLL};
my $KK = 0;
my $type = $rLL->[$KK]->[_TYPE_];
if ( $type ne 'b' && $type ne '#' ) { return $KK }
return $self->K_next_code($KK);
} ## end sub K_first_code
sub K_last_code {
my ( $self, ($rLL) ) = @_;
# Given:
# $rLL = optional token array to override default
# Return:
# index of last non-blank, non-comment code token, or
# undef if none (no tokens in the file)
$rLL = $self->[_rLL_] unless ( defined($rLL) );
return unless @{$rLL};
my $KK = @{$rLL} - 1;
my $type = $rLL->[$KK]->[_TYPE_];
if ( $type ne 'b' && $type ne '#' ) { return $KK }
return $self->K_previous_code($KK);
} ## end sub K_last_code
sub get_parent_containers {
my ( $self, $seqno ) = @_;
# Given:
# $seqno = sequence number of a container
# Return:
# ref to a list of parent container sequence numbers
my @list;
if ($seqno) {
my $rparent_of_seqno = $self->[_rparent_of_seqno_];
my $seqno_last = $seqno;
while ( $seqno = $rparent_of_seqno->{$seqno} ) {
last if ( $seqno == SEQ_ROOT );
if ( $seqno >= $seqno_last ) {
## shouldn't happen - parent containers have lower seq numbers
DEVEL_MODE && Fault(<<EOM);
Error in 'rparent_of_seqno': expecting seqno=$seqno < last seqno=$seqno_last
EOM
last;
}
$seqno_last = $seqno;
push @list, $seqno;
} ## end while ( $seqno = $rparent_of_seqno...)
}
return \@list;
} ## end sub get_parent_containers
sub mark_parent_containers {
my ( $self, $seqno, $rhash, ($value) ) = @_;
# Task:
# set $rhash->{$seqno}=$value for all parent containers
# but not for $seqno itself
# Given:
# $seqno = sequence number of a container
# $rhash = ref to a hash with seqno as key
# $value = value for setting $rhash->{$seqno}=$value
# default = 1
return unless ($seqno);
if ( !defined($value) ) { $value = 1 }
my $rparent_of_seqno = $self->[_rparent_of_seqno_];
my $seqno_last = $seqno;
while ( $seqno = $rparent_of_seqno->{$seqno} ) {
last if ( $seqno == SEQ_ROOT );
if ( $seqno >= $seqno_last ) {
## shouldn't happen - parent containers have lower sequence numbers
DEVEL_MODE && Fault(<<EOM);
Error in 'rparent_of_seqno': expecting seqno=$seqno < last seqno=$seqno_last
EOM
last;
}
$seqno_last = $seqno;
$rhash->{$seqno} = $value;
} ## end while ( $seqno = $rparent_of_seqno...)
return;
} ## end sub mark_parent_containers
sub copy_token_as_type {
# This provides a quick way to create a new token by
# slightly modifying an existing token.
my ( $rold_token, $type, $token ) = @_;
my @rnew_token = @{$rold_token};
$rnew_token[_TYPE_] = $type;
$rnew_token[_TOKEN_] = $token;
$rnew_token[_TYPE_SEQUENCE_] = EMPTY_STRING;
return \@rnew_token;
} ## end sub copy_token_as_type
sub parent_seqno_by_K {
# Return the sequence number of the parent container of token K, if any.
my ( $self, $KK ) = @_;
my $rLL = $self->[_rLL_];
# The task is to jump forward to the next container token
# and use the sequence number of either it or its parent.
# For example, consider the following with seqno=5 of the '[' and ']'
# being called with index K of the first token of each line:
# # result
# push @tests, # -
# [ # -
# sub { 99 }, 'do {&{%s} for 1,2}', # 5
# '(&{})(&{})', undef, # 5
# [ 2, 2, 0 ], 0 # 5
# ]; # -
# NOTE: The ending parent will be SEQ_ROOT for a balanced file. For
# unbalanced files, last sequence number will either be undefined or it may
# be at a deeper level. In either case we will just return SEQ_ROOT to
# have a defined value and allow formatting to proceed.
my $parent_seqno = SEQ_ROOT;
return $parent_seqno if ( !defined($KK) );
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ($type_sequence) {
$parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
}
else {
my $Kt = $self->[_rK_next_seqno_by_K_]->[$KK];
if ( defined($Kt) ) {
$type_sequence = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
my $type = $rLL->[$Kt]->[_TYPE_];
# if next container token is closing, it is the parent seqno
if ( $is_closing_type{$type} ) {
$parent_seqno = $type_sequence;
}
# otherwise we want its parent container
else {
$parent_seqno = $self->[_rparent_of_seqno_]->{$type_sequence};
}
}
}
$parent_seqno = SEQ_ROOT if ( !defined($parent_seqno) );
return $parent_seqno;
} ## end sub parent_seqno_by_K
sub parent_sub_seqno {
my ( $self, $seqno_paren ) = @_;
# Find sequence number of the named sub (not asub) which contains a given
# sequenced item
# Given:
# $seqno_paren = sequence number of a token within the sub
# Returns:
# $seqno of the sub, or
# nothing if no sub found
return unless defined($seqno_paren);
# Search upward
my $seqno = $seqno_paren;
my $seqno_last = $seqno_paren;
while ( $seqno = $self->[_rparent_of_seqno_]->{$seqno} ) {
last if ( $seqno == SEQ_ROOT );
if ( $self->[_ris_sub_block_]->{$seqno} ) {
return $seqno;
}
if ( $seqno >= $seqno_last ) {
## shouldn't happen - parent containers have lower sequence numbers
DEVEL_MODE && Fault(<<EOM);
Error in 'rparent_of_seqno': expecting seqno=$seqno < last seqno=$seqno_last
EOM
last;
}
$seqno_last = $seqno;
} ## end while ( $seqno = $self->[...])
return;
} ## end sub parent_sub_seqno
sub parent_sub_seqno_by_K {
my ( $self, $KK ) = @_;
#--------------------------------------------------------------------
# NOTE: not currently called but keep for possible future development
#--------------------------------------------------------------------
# Find sequence number of the named sub which contains a given token
# Given:
# $K = index K of a token
# Returns:
# $seqno of the sub, or
# nothing if no sub found
return unless defined($KK);
my $seqno_sub;
my $parent_seqno = $self->parent_seqno_by_K($KK);
if ( $self->[_ris_sub_block_]->{$parent_seqno} ) {
$seqno_sub = $parent_seqno;
}
else {
$seqno_sub = $self->parent_sub_seqno($parent_seqno);
}
return $seqno_sub;
} ## end sub parent_sub_seqno_by_K
sub is_in_block_by_i {
my ( $self, $i ) = @_;
# Return true if
# token at i is contained in a BLOCK
# or is at root level
# or there is some kind of error (i.e. unbalanced file)
# Return false otherwise
if ( $i < 0 ) {
DEVEL_MODE && Fault("Bad call, i='$i'\n");
return 1;
}
my $seqno = $parent_seqno_to_go[$i];
return 1 if ( !$seqno || $seqno == SEQ_ROOT );
return 1 if ( $self->[_rblock_type_of_seqno_]->{$seqno} );
return;
} ## end sub is_in_block_by_i
sub is_in_block_by_K {
my ( $self, $KK ) = @_;
# Return true if
# token at $KK is contained in a BLOCK
# or is at root level
# or there is some kind of error (i.e. unbalanced file)
# Return false otherwise
my $parent_seqno = $self->parent_seqno_by_K($KK);
return SEQ_ROOT if ( !$parent_seqno || $parent_seqno == SEQ_ROOT );
return $self->[_rblock_type_of_seqno_]->{$parent_seqno};
} ## end sub is_in_block_by_K
sub is_in_list_by_i {
my ( $self, $i ) = @_;
# Return true if token at i is contained in a LIST
# Return false otherwise
my $seqno = $parent_seqno_to_go[$i];
return if ( !$seqno );
return if ( $seqno == SEQ_ROOT );
if ( $self->[_ris_list_by_seqno_]->{$seqno} ) {
return 1;
}
return;
} ## end sub is_in_list_by_i
sub is_list_by_seqno {
# Return true if the immediate contents of a container appears to be a
# list.
my ( $self, $seqno ) = @_;
return unless defined($seqno);
return $self->[_ris_list_by_seqno_]->{$seqno};
} ## end sub is_list_by_seqno
sub is_interpolated_here_doc {
my ($token) = @_;
# Given:
# $token = the token text of a type 'h' token
# Return:
# true if the here doc is interpolated
# false if not
# Examples:
# <<EOM <-- interpolated
# <<"EOM" <-- interpolated
# <<'EOM' <-- not interpolated
return $token !~ /^ [^<]* << [~]? \' /x;
} ## end sub is_interpolated_here_doc
sub get_here_text {
my ( $self, $ix_HERE_BEG ) = @_;
# Collect the text of a here-doc
# Given:
# $ix_HERE_BEG = index of the line BEFORE the start of this here-doc
# Returns:
# $ix_HERE_END = line index of the last line of this here-doc
# $here_text = the here-doc text
# Example of $here_text with 2 lines:
# my $str=<<EOM; <--this line has index $ix_HERE_BEG
# here text line 1
# here text line 2
# EOM <--this line has index $ix_HERE_END
# If here-docs are stacked, then caller will use $ix_HERE_END as
# the beginning of the next here-doc.
my $rlines = $self->[_rlines_];
# Loop to collect the here doc text
my $ix_max = @{$rlines} - 1;
my $ix = $ix_HERE_BEG;
my $ix_HERE_END;
my $here_text = EMPTY_STRING;
while ( ++$ix <= $ix_max ) {
my $lhash = $rlines->[$ix];
my $ltype = $lhash->{_line_type};
if ( $ltype eq 'HERE' ) {
$here_text .= $lhash->{_line_text};
next;
}
elsif ( $ltype eq 'HERE_END' ) {
$ix_HERE_END = $ix;
last;
}
else {
DEVEL_MODE
&& Fault("line_type=$ltype should be HERE..\n");
$ix_HERE_END = $ix;
last;
}
} ## end while ( ++$ix <= $ix_max )
return ( $ix_HERE_END, $here_text );
} ## end sub get_here_text
sub is_trailing_comma {
my ( $self, $KK ) = @_;
# Given:
# $KK - index of a comma in token list
# Return:
# true if the comma at index $KK is a trailing comma
# false if not
my $rLL = $self->[_rLL_];
my $type_KK = $rLL->[$KK]->[_TYPE_];
if ( $type_KK ne ',' ) {
DEVEL_MODE
&& Fault("Bad call: expected type ',' but received '$type_KK'\n");
return;
}
my $Knnb = $self->K_next_nonblank($KK);
if ( defined($Knnb) ) {
my $type_sequence = $rLL->[$Knnb]->[_TYPE_SEQUENCE_];
my $type_Knnb = $rLL->[$Knnb]->[_TYPE_];
if ( $type_sequence && $is_closing_type{$type_Knnb} ) {
return 1;
}
}
return;
} ## end sub is_trailing_comma
sub cumulative_length_before_K {
my ( $self, $KK ) = @_;
# Returns the cumulative character length from the first token to
# token before the token at index $KK.
my $rLL = $self->[_rLL_];
return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
} ## end sub cumulative_length_before_K
# Number of leading characters to remove for quote types
# Zero values indicate types not used
my %Q_leading_chars = (
"'" => 1,
'"' => 1,
'/' => 1,
'm' => 2,
's' => 2,
'y' => 2,
'tr' => 3,
'qx' => 3,
'qr' => 3,
'qq' => 3,
'q' => 2,
);
# hash keys which are quotes may be one of these types
my %is_simple_quote_type = (
"'" => 1,
'"' => 1,
'qq' => 1,
'q' => 1,
);
sub Q_spy {
my ( $string, ($is_qwaf_Q) ) = @_;
# Look at the first few characters of a type Q token and identify
# its specific type, based on the above hash.
# Given:
# $string = A token type Q; if multiline, then the first token.
# $is_qwaf_Q = true if this is a special type Q within a qw list
# formatted with -qwaf. These do not have containing quote marks.
# Returns:
# - nothing if the type cannot be identified, or
# - hash with these values otherwise:
# nch = number of leading characters to remove to reveal the text
# is_simple = true if this quote is one of these types: qq q ' "
# is_interpolated = true if this quote type may contain code
# ch_key = first one or two characters indicating type
# i.e. one of the above hash keys.
# Note:
# - The number $nch is the minimum number; but it could be more
# if there are spaces before before the leading '(' or other delimiter,
# - This call works for multiline quotes provided that this sub is
# called with the first Q token in the string, not an intermediate one.
# - For efficiency, caller can handle common cases of leading ' or "
# - On return, caller should check the token type with $ch_key to decide
# how to parse further.
# - For the simple quote-type operators, the inner text can be found as:
# my $text = $is_qwaf_Q ? $string : substr( $string, $nch, -1 );
# where:
# $string = the concatenation of all type Q tokens, if multiline.
# Note that here we must check for the two char case first, then 1, because
# of ambiguity when $ch1='q'.
# Values for $is_qwaf_Q:
my $is_interpolated = 0;
my $is_simple = 1;
my $nch = 0;
my $ch_key = EMPTY_STRING;
# Note that type Q tokens in a qwaf call are not contained within quotes
if ( !$is_qwaf_Q ) {
my $ch1 = substr( $string, 0, 1 );
my $ch2 = substr( $string, 0, 2 );
$nch = $Q_leading_chars{$ch2};
$ch_key = $ch2;
if ( !defined($nch) ) {
$nch = $Q_leading_chars{$ch1};
$ch_key = $ch1;
}
return if ( !defined($nch) );
$is_simple = $is_simple_quote_type{$ch_key};
$is_interpolated = $ch1 ne 'q' && $ch1 ne "'";
}
return {
nch => $nch,
is_simple => $is_simple,
is_interpolated => $is_interpolated,
## TBD: Unique key not used yet, for future use:
## ch_key => $ch_key,
};
} ## end sub Q_spy
###########################################
# CODE SECTION 3: Check and process options
###########################################
sub check_options {
# This routine is called to check the user-supplied run parameters
# and to configure the control hashes to them.
( $rOpts, my $wvt_in_args, my $num_files, my $line_range_clipped ) = @_;
initialize_whitespace_hashes();
if ( $rOpts->{'dump-want-left-space'} ) {
dump_want_left_space(*STDOUT);
Exit(0);
}
if ( $rOpts->{'dump-want-right-space'} ) {
dump_want_right_space(*STDOUT);
Exit(0);
}
initialize_bond_strength_hashes();
# This function must be called early to get hashes with grep initialized
initialize_grep_and_friends();
# Make needed regex patterns for matching text.
# NOTE: sub_matching_patterns must be made first because later patterns use
# them; see RT #133130.
make_sub_matching_pattern(); # MUST BE FIRST pattern made
make_static_block_comment_pattern();
make_static_side_comment_pattern();
$format_skipping_pattern_begin =
make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
$format_skipping_pattern_end =
make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
make_non_indenting_brace_pattern();
initialize_closing_side_comments();
initialize_missing_else_comment();
initialize_warn_variable_types( $wvt_in_args, $num_files,
$line_range_clipped );
initialize_warn_mismatched();
make_bli_pattern();
make_bl_pattern();
make_block_brace_vertical_tightness_pattern();
make_blank_line_pattern();
make_keyword_group_list_pattern();
prepare_cuddled_block_types();
if ( $rOpts->{'dump-cuddled-block-list'} ) {
dump_cuddled_block_list(*STDOUT);
Exit(0);
}
# --indent-only skips the call to sub respace_tokens, which defines
# some essential data structures needed by some dump routines,
# or might be in the future. Since there is an immediate exit after a
# dump, we can turn off indent-only to get these structures for a -dump.
if ( $rOpts->{'indent-only'} ) {
if ( $rOpts->{'dump-mismatched-args'}
|| $rOpts->{'dump-mismatched-returns'} )
{
$rOpts->{'indent-only'} = 0;
}
if ( $rOpts->{'dump-block-summary'} ) {
$rOpts->{'indent-only'} = 0;
}
}
initialize_line_up_parentheses();
initialize_pack_operator_types();
check_tabs();
# We should put an upper bound on any -sil=n value. Otherwise enormous
# files could be created by mistake.
for ( $rOpts->{'starting-indentation-level'} ) {
if ( $_ && $_ > 100 ) {
Warn(<<EOM);
The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
EOM
$_ = 0;
}
}
# Require -msp > 0 to avoid future parsing problems (issue c147)
for ( $rOpts->{'minimum-space-to-comment'} ) {
if ( !$_ || $_ <= 0 ) { $_ = 1 }
}
initialize_outdent_keyword();
initialize_keyword_paren_inner_tightness();
initialize_space_after_keyword();
initialize_extended_block_tightness_list();
# The flag '$controlled_comma_style' will be set if the user
# entered any of -wbb=',' -wba=',' -kbb=',' -kba=','
# see sub 'initialize_token_break_preferences',
# and sub 'initialize_old_breakpoint_controls'
$controlled_comma_style = 0;
initialize_token_break_preferences();
initialize_old_breakpoint_controls();
initialize_container_indentation_options();
# make -l=0 equal to -l=infinite
if ( !$rOpts->{'maximum-line-length'} ) {
$rOpts->{'maximum-line-length'} = 1_000_000;
}
# make -lbl=0 equal to -lbl=infinite
if ( !$rOpts->{'long-block-line-count'} ) {
$rOpts->{'long-block-line-count'} = 1_000_000;
}
initialize_tightness_vars();
initialize_multiple_token_tightness();
initialize_global_option_vars();
initialize_line_length_vars(); # after 'initialize_global_option_vars'
initialize_trailing_comma_break_rules();
initialize_trailing_comma_rules(); # after 'initialize_line_length_vars'
# and '_trailing_comma_break_rules'
initialize_interbracket_arrow_style();
initialize_weld_nested_exclusion_rules();
initialize_weld_fat_comma_rules();
initialize_lpxl_lpil();
initialize_keep_old_blank_lines_hash();
return;
} ## end sub check_options
use constant ALIGN_GREP_ALIASES => 0;
sub initialize_grep_and_friends {
# Initialize or re-initialize hashes with 'grep' and grep aliases. This
# must be done after each set of options because new grep aliases may be
# used.
# re-initialize the hashes ... this is critical!
%is_sort_map_grep = ();
my @q = qw( sort map grep );
@is_sort_map_grep{@q} = (1) x scalar(@q);
my $olbxl = $rOpts->{'one-line-block-exclusion-list'};
my %is_olb_exclusion_word;
if ( defined($olbxl) ) {
my @list = split_words($olbxl);
if (@list) {
@is_olb_exclusion_word{@list} = (1) x scalar(@list);
}
}
# Make the list of block types which may be re-formed into one line.
# They will be modified with the grep-alias-list below and
# by sub 'prepare_cuddled_block_types'.
# Note that it is essential to always re-initialize the hash here:
%want_one_line_block = ();
if ( !$is_olb_exclusion_word{'*'} ) {
foreach (qw( sort map grep eval )) {
if ( !$is_olb_exclusion_word{$_} ) { $want_one_line_block{$_} = 1 }
}
}
# Note that any 'grep-alias-list' string has been preprocessed to be a
# trimmed, space-separated list.
my $str = $rOpts->{'grep-alias-list'};
my @grep_aliases = split /\s+/, $str;
if (@grep_aliases) {
@is_sort_map_grep{@grep_aliases} = (1) x scalar(@grep_aliases);
if ( $want_one_line_block{'grep'} ) {
@want_one_line_block{@grep_aliases} = (1) x scalar(@grep_aliases);
}
}
%is_sort_map_grep_eval = %is_sort_map_grep;
$is_sort_map_grep_eval{'eval'} = 1;
%is_sort_map_grep_eval_do = %is_sort_map_grep_eval;
$is_sort_map_grep_eval_do{'do'} = 1;
# These block types can take ci. This is used by the -xci option.
# Note that the 'sub' in this list is an anonymous sub. To be more correct
# we could remove sub and use ASUB pattern to also handle a
# prototype/signature. But that would slow things down and would probably
# never be useful.
%is_block_with_ci = %is_sort_map_grep_eval_do;
$is_block_with_ci{'sub'} = 1;
@q = qw( grep keys map reverse sort split );
push @q, @grep_aliases;
%is_keyword_returning_list = ();
@is_keyword_returning_list{@q} = (1) x scalar(@q);
# This code enables vertical alignment of grep aliases for testing. It has
# not been found to be beneficial, so it is off by default. But it is
# useful for precise testing of the grep alias coding.
if (ALIGN_GREP_ALIASES) {
%block_type_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'if',
'default' => 'if',
'case' => 'if',
'sort' => 'map',
'grep' => 'map',
);
foreach (@q) {
$block_type_map{$_} = 'map' unless ( $_ eq 'map' );
}
}
return;
} ## end sub initialize_grep_and_friends
sub initialize_weld_nested_exclusion_rules {
%weld_nested_exclusion_rules = ();
my $opt_name = 'weld-nested-exclusion-list';
my $str = $rOpts->{$opt_name};
# let a '0' be the same as not defined
return unless ($str);
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return unless ($str);
# There are four container tokens.
my %token_keys = (
'(' => '(',
'[' => '[',
'{' => '{',
'q' => 'q',
);
# We are parsing an exclusion list for nested welds. The list is a string
# with spaces separating any number of items. Each item consists of three
# pieces of information:
# <optional position> <optional type> <type of container>
# < ^ or . > < k or K > < ( [ { >
# The last character is the required container type and must be one of:
# ( = paren
# [ = square bracket
# { = brace
# An optional leading position indicator:
# ^ means the leading token position in the weld
# . means a secondary token position in the weld
# no position indicator means all positions match
# An optional alphanumeric character between the position and container
# token selects to which the rule applies:
# k = any keyword
# K = any non-keyword
# f = function call
# F = not a function call
# w = function or keyword
# W = not a function or keyword
# no letter means any preceding type matches
# Examples:
# ^( - the weld must not start with a paren
# .( - the second and later tokens may not be parens
# ( - no parens in weld
# ^K( - exclude a leading paren not preceded by a keyword
# .k( - exclude a secondary paren preceded by a keyword
# [ { - exclude all brackets and braces
my @items = split /\s+/, $str;
my $msg1;
my $msg2;
foreach my $item (@items) {
my $item_save = $item;
my $tok = chop $item;
my $key = $token_keys{$tok};
if ( !defined($key) ) {
$msg1 .= " '$item_save'";
next;
}
if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
$weld_nested_exclusion_rules{$key} = [];
}
my $rflags = $weld_nested_exclusion_rules{$key};
# A 'q' means do not weld quotes
if ( $tok eq 'q' ) {
$rflags->[0] = '*';
$rflags->[1] = '*';
next;
}
my $pos = '*';
my $select = '*';
if ($item) {
if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
$pos = $1 if ($1);
$select = $2 if ($2);
}
else {
$msg1 .= " '$item_save'";
next;
}
}
my $err;
if ( $pos eq '^' || $pos eq '*' ) {
if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
$err = 1;
}
$rflags->[0] = $select;
}
if ( $pos eq '.' || $pos eq '*' ) {
if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
$err = 1;
}
$rflags->[1] = $select;
}
if ($err) { $msg2 .= " '$item_save'"; }
}
if ($msg1) {
Warn(<<EOM);
Unexpecting symbol(s) encountered in --$opt_name will be ignored:
$msg1
EOM
}
if ($msg2) {
Warn(<<EOM);
Multiple specifications were encountered in the --weld-nested-exclusion-list for:
$msg2
Only the last will be used.
EOM
}
return;
} ## end sub initialize_weld_nested_exclusion_rules
sub initialize_weld_fat_comma_rules {
# Initialize a hash controlling which opening token types can be
# welded around a fat comma
%weld_fat_comma_rules = ();
# The -wfc flag turns on welding of '=>' after an opening paren
if ( $rOpts->{'weld-fat-comma'} ) { $weld_fat_comma_rules{'('} = 1 }
# This could be generalized in the future by introducing a parameter
# -weld-fat-comma-after=str (-wfca=str), where str contains any of:
# * { [ (
# to indicate which opening parens may weld to a subsequent '=>'
# The flag -wfc would then be equivalent to -wfca='('
# This has not been done because it is not yet clear how useful
# this generalization would be.
return;
} ## end sub initialize_weld_fat_comma_rules
sub initialize_lpxl_lpil {
%line_up_parentheses_control_hash = ();
$line_up_parentheses_control_is_lpxl = 1;
my $lpxl = $rOpts->{'line-up-parentheses-exclusion-list'};
my $lpil = $rOpts->{'line-up-parentheses-inclusion-list'};
if ( $lpxl && $lpil ) {
Warn(<<EOM);
You entered values for both -lpxl=s and -lpil=s; the -lpil list will be ignored
EOM
}
if ($lpxl) {
$line_up_parentheses_control_is_lpxl = 1;
initialize_line_up_parentheses_control_hash(
$rOpts->{'line-up-parentheses-exclusion-list'}, 'lpxl' );
}
elsif ($lpil) {
$line_up_parentheses_control_is_lpxl = 0;
initialize_line_up_parentheses_control_hash(
$rOpts->{'line-up-parentheses-inclusion-list'}, 'lpil' );
}
else {
# neither -lpxl nor -lpil specified
}
return;
} ## end sub initialize_lpxl_lpil
sub initialize_line_up_parentheses_control_hash {
my ( $str, $opt_name ) = @_;
# let a 0 be the same as not defined
return unless ($str);
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return unless ($str);
# The format is space separated items, where each item must consist of a
# string with a token type preceded by an optional text token and followed
# by an integer:
# For example:
# W(1
# = (flag1)(key)(flag2), where
# flag1 = 'W'
# key = '('
# flag2 = '1'
my @items = split /\s+/, $str;
my $msg1;
my $msg2;
foreach my $item (@items) {
my $item_save = $item;
my ( $flag1, $key, $flag2 );
if ( $item =~ /^ ([^\(\[\{]*)? ([\(\{\[]) (\d)? $/x ) {
## $flag1 $key $flag2
$flag1 = $1 if $1;
$key = $2 if $2;
$flag2 = $3 if defined($3);
}
else {
$msg1 .= " '$item_save'";
next;
}
if ( !defined($key) ) {
$msg1 .= " '$item_save'";
next;
}
# Check for valid flag1
if ( !defined($flag1) ) { $flag1 = '*' }
if ( $flag1 !~ /^[kKfFwW\*]$/ ) {
$msg1 .= " '$item_save'";
next;
}
# Check for valid flag2
# 0 or blank: ignore container contents
# 1 all containers with sublists match
# 2 all containers with sublists, code blocks or ternary operators match
# ... this could be extended in the future
if ( !defined($flag2) ) { $flag2 = 0 }
if ( $flag2 !~ /^[012]$/ ) {
$msg1 .= " '$item_save'";
next;
}
if ( !defined( $line_up_parentheses_control_hash{$key} ) ) {
$line_up_parentheses_control_hash{$key} = [ $flag1, $flag2 ];
next;
}
# check for multiple conflicting specifications
my $rflags = $line_up_parentheses_control_hash{$key};
my $err;
if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
$err = 1;
$rflags->[0] = $flag1;
}
if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
$err = 1;
$rflags->[1] = $flag2;
}
$msg2 .= " '$item_save'" if ($err);
next;
}
if ($msg1) {
Warn(<<EOM);
Unexpecting symbol(s) encountered in --$opt_name will be ignored:
$msg1
EOM
}
if ($msg2) {
Warn(<<EOM);
Multiple specifications were encountered in the $opt_name at:
$msg2
Only the last will be used.
EOM
}
# Speedup: we can turn off -lp if it is not actually used
if ($line_up_parentheses_control_is_lpxl) {
my $all_off = 1;
foreach my $key (qw# ( { [ #) {
my $rflags = $line_up_parentheses_control_hash{$key};
if ( defined($rflags) ) {
my ( $flag1, $flag2 ) = @{$rflags};
if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
if ($flag2) { $all_off = 0; last }
}
}
if ($all_off) {
$rOpts->{'line-up-parentheses'} = EMPTY_STRING;
}
}
return;
} ## end sub initialize_line_up_parentheses_control_hash
sub initialize_space_after_keyword {
# Default keywords for which space is introduced before an opening paren:
# (at present, including them messes up vertical alignment)
my @sak = qw( my local our state and or xor err eq ne if else elsif until
unless while for foreach return switch case given when catch );
%space_after_keyword = map { $_ => 1 } @sak;
# first remove any or all of these if desired
if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
# -nsak='*' selects all the above keywords
if ( @q == 1 && $q[0] eq '*' ) { @q = keys %space_after_keyword }
@space_after_keyword{@q} = (0) x scalar(@q);
}
# then allow user to add to these defaults
if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
@space_after_keyword{@q} = (1) x scalar(@q);
}
return;
} ## end sub initialize_space_after_keyword
sub initialize_outdent_keyword {
# Implement outdenting preferences for keywords
%outdent_keyword = ();
my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
if ( !@okw ) {
@okw = qw( next last redo goto return ); # defaults
}
# FUTURE: if not a keyword, assume that it is an identifier
foreach (@okw) {
if ( Perl::Tidy::Tokenizer::is_keyword($_) ) {
$outdent_keyword{$_} = 1;
}
else {
Warn("ignoring '$_' in -okwl list; not a perl keyword");
}
}
return;
} ## end sub initialize_outdent_keyword
sub initialize_keyword_paren_inner_tightness {
# Setup hash for -kpit option
%keyword_paren_inner_tightness = ();
my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
if ( defined($kpit_value) && $kpit_value != 1 ) {
my @kpit =
split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
if ( !@kpit ) {
@kpit = qw( if elsif unless while until for foreach ); # defaults
}
# we will allow keywords and user-defined identifiers
foreach (@kpit) {
$keyword_paren_inner_tightness{$_} = $kpit_value;
}
}
return;
} ## end sub initialize_keyword_paren_inner_tightness
sub initialize_extended_block_tightness_list {
# Setup the control hash for --extended-block-tightness
# keywords taking indirect objects:
my @k_list = keys %is_indirect_object_taker;
# type symbols which may precede an opening block brace
my @t_list = qw( $ @ % & * );
push @t_list, '$#';
my @all = ( @k_list, @t_list );
# We will build the selection in %hash
# By default the option is 'on' for keywords only (-xbtl='k')
my %hash;
@hash{@k_list} = (1) x scalar(@k_list);
@hash{@t_list} = (0) x scalar(@t_list);
# This can be overridden with -xbtl="..."
my $long_name = 'extended-block-tightness-list';
if ( $rOpts->{$long_name} ) {
my @words = split_words( $rOpts->{$long_name} );
my @unknown;
# Turn everything off
@hash{@all} = (0) x scalar(@all);
# Then turn on selections
foreach my $word (@words) {
# 'print' etc turns on a specific word or symbol
if ( defined( $hash{$word} ) ) { $hash{$word} = 1; }
# 'k' turns on all keywords
elsif ( $word eq 'k' ) {
@hash{@k_list} = (1) x scalar(@k_list);
}
# 't' turns on all symbols
elsif ( $word eq 't' ) {
@hash{@t_list} = (1) x scalar(@t_list);
}
# 'kt' same as 'k' and 't' for convenience
elsif ( $word eq 'kt' ) {
@hash{@all} = (1) x scalar(@all);
}
# Anything else is an error
else { push @unknown, $word }
}
if (@unknown) {
my $num = @unknown;
local $LIST_SEPARATOR = SPACE;
Warn(<<EOM);
$num unrecognized keyword(s) were input with --$long_name :
@unknown
EOM
}
}
# Transfer the result to the global hash
%extended_block_tightness_list = %hash;
return;
} ## end sub initialize_extended_block_tightness_list
sub initialize_token_break_preferences {
# Initialize these global hashes defining break preferences:
# %want_break_before
# %break_before_container_types
my $break_after = sub {
my @toks = @_;
foreach my $tok (@toks) {
if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
if ( $tok eq ',' ) { $controlled_comma_style = 1 }
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
( $lbs, $rbs );
}
}
return;
}; ## end $break_after = sub
my $break_before = sub {
my @toks = @_;
foreach my $tok (@toks) {
if ( $tok eq ',' ) { $controlled_comma_style = 1 }
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
( $lbs, $rbs );
}
}
return;
}; ## end $break_before = sub
$break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
$break_before->(@all_operators)
if ( $rOpts->{'break-before-all-operators'} );
$break_after->( split_words( $rOpts->{'want-break-after'} ) );
$break_before->( split_words( $rOpts->{'want-break-before'} ) );
# Make note if breaks are before certain key types
# Added '->' for git #171.
%want_break_before = ();
foreach my $tok ( @all_operators, ',', '->' ) {
$want_break_before{$tok} =
$left_bond_strength{$tok} < $right_bond_strength{$tok};
}
# Coordinate ?/: breaks, which must be similar
# The small strength 0.01 which is added is 1% of the strength of one
# indentation level and seems to work okay.
if ( !$want_break_before{':'} ) {
$want_break_before{'?'} = $want_break_before{':'};
$right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
$left_bond_strength{'?'} = NO_BREAK;
}
# Only make a hash entry for the next parameters if values are defined.
# That allows a quick check to be made later.
%break_before_container_types = ();
for ( $rOpts->{'break-before-hash-brace'} ) {
$break_before_container_types{'{'} = $_ if $_ && $_ > 0;
}
for ( $rOpts->{'break-before-square-bracket'} ) {
$break_before_container_types{'['} = $_ if $_ && $_ > 0;
}
for ( $rOpts->{'break-before-paren'} ) {
$break_before_container_types{'('} = $_ if $_ && $_ > 0;
}
# Note: a fix for b1266 previously here is now covered by the
# updates for b1470, b1474, so it has been removed.
return;
} ## end sub initialize_token_break_preferences
sub initialize_line_up_parentheses {
# -xlp implies -lp
if ( $rOpts->{'extended-line-up-parentheses'} ) {
$rOpts->{'line-up-parentheses'} ||= 1;
}
if ( $rOpts->{'line-up-parentheses'} ) {
if ( $rOpts->{'indent-only'}
|| !$rOpts->{'add-newlines'}
|| !$rOpts->{'delete-old-newlines'} )
{
Warn(<<EOM);
-----------------------------------------------------------------------
Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
The -lp indentation logic requires that perltidy be able to coordinate
arbitrarily large numbers of line breakpoints. This isn't possible
with these flags.
-----------------------------------------------------------------------
EOM
$rOpts->{'line-up-parentheses'} = 0;
$rOpts->{'extended-line-up-parentheses'} = 0;
}
if ( $rOpts->{'whitespace-cycle'} ) {
Warn(<<EOM);
Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
EOM
$rOpts->{'whitespace-cycle'} = 0;
}
}
#-----------------------------------------------------------
# The combination -lp -vmll can be unstable if -ci<2 (b1267)
#-----------------------------------------------------------
# The -vmll and -lp parameters do not really work well together.
# This is a very crude fix for an unusual parameter combination.
if ( $rOpts->{'variable-maximum-line-length'}
&& $rOpts->{'line-up-parentheses'}
&& $rOpts->{'continuation-indentation'} < 2 )
{
$rOpts->{'continuation-indentation'} = 2;
##Warn("Increased -ci=n to n=2 for stability with -lp and -vmll\n");
}
#-----------------------------------------------------------
# The combination -lp -vmll -atc -dtc can be unstable
#-----------------------------------------------------------
# This fixes b1386 b1387 b1388 which had -wtc='b'
# Updated to to include any -wtc to fix b1426
if ( $rOpts->{'variable-maximum-line-length'}
&& $rOpts->{'line-up-parentheses'}
&& $rOpts->{'add-trailing-commas'}
&& $rOpts->{'delete-trailing-commas'}
&& $rOpts->{'want-trailing-commas'} )
{
$rOpts->{'delete-trailing-commas'} = 0;
## Issuing a warning message causes trouble with test cases, and this combo is
## so rare that it is unlikely to not occur in practice. So skip warning.
## Warn(
##"The combination -vmll -lp -atc -dtc can be unstable; turning off -dtc\n"
## );
}
#-----------------------------------------------------------
# The combination -xlp -xci and ci>i can be unstable (b1466)
#-----------------------------------------------------------
# Deactivated: the fix for b1501 also fixed b1466 in a simpler way.
# So this block can eventually be removed.
if ( 0
&& $rOpts->{'extended-line-up-parentheses'}
&& $rOpts->{'extended-continuation-indentation'}
&& $rOpts->{'continuation-indentation'} > $rOpts->{'indent-columns'}
&& $rOpts->{'indent-columns'} > 1 )
{
$rOpts->{'continuation-indentation'} = $rOpts->{'indent-columns'};
## This combination is only likely to occur during random testing, so
## skip the warning.
##Warn("The combination -xlp -xci -ci>-i can be unstable; reducing ci\n");
}
return;
} ## end sub initialize_line_up_parentheses
sub initialize_pack_operator_types {
# Setup the control hash for --pack-operator-types
%pack_operator_types = ();
# This option is currently only implemented for '->' and '.' chains.
# The possibility exists to extend this to other chain operators
# in the future, but some programming and a lot of testing are required.
##my @ok = qw( -> . && || and or : ? + - * / );
my @ok = qw( -> . );
my %is_ok;
@is_ok{@ok} = (1) x scalar(@ok);
my $long_name = 'pack-operator-types';
my %hash;
my @unknown;
if ( $rOpts->{$long_name} ) {
my @words = split_words( $rOpts->{$long_name} );
foreach my $word (@words) {
if ( $word eq '?' ) { $word = ':' }
if ( $word eq '/' ) { $word = '*' }
if ( $word eq '-' ) { $word = '+' }
if ( $is_ok{$word} ) { $hash{$word} = 1 }
else { push @unknown, $word }
}
if (@unknown) {
my $num = @unknown;
local $LIST_SEPARATOR = SPACE;
Warn(<<EOM);
$num unrecognized types(s) were input with --$long_name :
@unknown
EOM
}
}
# Transfer the result to the global hash
%pack_operator_types = %hash;
return;
} ## end sub initialize_pack_operator_types
sub check_tabs {
# At present, tabs are not compatible with the line-up-parentheses style
# (it would be possible to entab the total leading whitespace
# just prior to writing the line, if desired).
if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
# tabs are not compatible with outdenting..
if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
Warn(<<EOM);
Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
return;
} ## end sub check_tabs
sub initialize_container_indentation_options {
%container_indentation_options = ();
foreach my $pair (
[ 'break-before-hash-brace-and-indent', '{' ],
[ 'break-before-square-bracket-and-indent', '[' ],
[ 'break-before-paren-and-indent', '(' ],
)
{
my ( $key, $tok ) = @{$pair};
my $opt = $rOpts->{$key};
if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
{
# (1) -lp is not compatible with opt=2, silently set to opt=0
# (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
# (3) set opt=0 if -i < -ci (can be unstable, case b1355)
if ( $opt == 2 ) {
if (
$rOpts->{'line-up-parentheses'}
|| ( $rOpts->{'indent-columns'} <=
$rOpts->{'continuation-indentation'} )
)
{
$opt = 0;
}
}
$container_indentation_options{$tok} = $opt;
}
}
return;
} ## end sub initialize_container_indentation_options
sub initialize_old_breakpoint_controls {
if ( $rOpts->{'ignore-old-breakpoints'} ) {
my @conflicts;
if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
$rOpts->{'break-at-old-method-breakpoints'} = 0;
push @conflicts, '--break-at-old-method-breakpoints (-bom)';
}
if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
$rOpts->{'break-at-old-comma-breakpoints'} = 0;
push @conflicts, '--break-at-old-comma-breakpoints (-boc)';
}
if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
$rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
push @conflicts, '--break-at-old-semicolon-breakpoints (-bos)';
}
if ( $rOpts->{'keep-old-breakpoints-before'} ) {
$rOpts->{'keep-old-breakpoints-before'} = EMPTY_STRING;
push @conflicts, '--keep-old-breakpoints-before (-kbb)';
}
if ( $rOpts->{'keep-old-breakpoints-after'} ) {
$rOpts->{'keep-old-breakpoints-after'} = EMPTY_STRING;
push @conflicts, '--keep-old-breakpoints-after (-kba)';
}
if (@conflicts) {
my $msg = join( "\n ",
" Conflict: These conflicts with --ignore-old-breakponts (-iob) will be turned off:",
@conflicts ) . "\n";
Warn($msg);
}
# Note: These additional parameters are made inactive by -iob.
# They are silently turned off here because they are on by default.
# We would generate unexpected warnings if we issued a warning.
$rOpts->{'break-at-old-keyword-breakpoints'} = 0;
$rOpts->{'break-at-old-logical-breakpoints'} = 0;
$rOpts->{'break-at-old-ternary-breakpoints'} = 0;
$rOpts->{'break-at-old-attribute-breakpoints'} = 0;
}
%keep_break_before_type = ();
initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-before'},
'kbb', \%keep_break_before_type );
%keep_break_after_type = ();
initialize_keep_old_breakpoints( $rOpts->{'keep-old-breakpoints-after'},
'kba', \%keep_break_after_type );
# Modify %keep_break_before and %keep_break_after to avoid conflicts
# with %want_break_before; fixes b1436.
# This became necessary after breaks for some tokens were converted
# from hard to soft (see b1433).
# We could do this for all tokens, but to minimize changes to existing
# code we currently only do this for the soft break tokens.
foreach my $key ( keys %keep_break_before_type ) {
if ( defined( $want_break_before{$key} )
&& !$want_break_before{$key}
&& $is_soft_keep_break_type{$key} )
{
$keep_break_after_type{$key} = $keep_break_before_type{$key};
delete $keep_break_before_type{$key};
}
}
foreach my $key ( keys %keep_break_after_type ) {
if ( defined( $want_break_before{$key} )
&& $want_break_before{$key}
&& $is_soft_keep_break_type{$key} )
{
$keep_break_before_type{$key} = $keep_break_after_type{$key};
delete $keep_break_after_type{$key};
}
}
$controlled_comma_style ||= $keep_break_before_type{','};
$controlled_comma_style ||= $keep_break_after_type{','};
return;
} ## end sub initialize_old_breakpoint_controls
use constant DEBUG_KB => 0;
sub initialize_keep_old_breakpoints {
my ( $str, $short_name, $rkeep_break_hash ) = @_;
# 0 will be treated same as not defined
return unless $str;
my %flags = ();
my @list = split_words($str);
if ( DEBUG_KB && @list ) {
local $LIST_SEPARATOR = SPACE;
print <<EOM;
DEBUG_KB entering for '$short_name' with str=$str\n";
list is: @list;
EOM
}
# Ignore kbb='(' and '[' and '{': can cause unstable math formatting
# (issues b1346, b1347, b1348) and likewise ignore kba=')' and ']' and '}'
# Also always ignore ? and : (b1440 and b1433-b1439)
if ( $short_name eq 'kbb' ) {
@list = grep { !m/[\(\[\{\?\:]/ } @list;
}
elsif ( $short_name eq 'kba' ) {
@list = grep { !m/[\)\]\}\?\:]/ } @list;
}
else {
Fault(<<EOM);
Bad call arg - received short name '$short_name' but expecting 'kbb' or 'kba'
EOM
}
# pull out any any leading container code, like f( or *{
# For example: 'f(' becomes flags hash entry '(' => 'f'
foreach my $item (@list) {
if ( $item =~ /^( [ \w\* ] )( [ \{\(\[\}\)\] ] )$/x ) {
$item = $2;
$flags{$2} = $1;
}
}
my @unknown_types;
foreach my $type (@list) {
if ( !Perl::Tidy::Tokenizer::is_valid_token_type($type) ) {
push @unknown_types, $type;
}
}
if (@unknown_types) {
my $num = @unknown_types;
local $LIST_SEPARATOR = SPACE;
Warn(<<EOM);
$num unrecognized token types were input with --$short_name :
@unknown_types
EOM
}
@{$rkeep_break_hash}{@list} = (1) x scalar(@list);
foreach my $key ( keys %flags ) {
my $flag = $flags{$key};
if ( length($flag) != 1 ) {
Warn(<<EOM);
Multiple entries given for '$key' in '$short_name'
EOM
}
elsif ( ( $key eq '(' || $key eq ')' ) && $flag !~ /^[kKfFwW\*]$/ ) {
Warn(<<EOM);
Unknown flag '$flag' given for '$key' in '$short_name'
EOM
}
elsif ( ( $key eq '}' || $key eq '}' ) && $flag !~ /^[bB\*]$/ ) {
Warn(<<EOM);
Unknown flag '$flag' given for '$key' in '$short_name'
EOM
}
else {
# no error seen
}
$rkeep_break_hash->{$key} = $flag;
}
if ( DEBUG_KB && @list ) {
my @tmp = %flags;
local $LIST_SEPARATOR = SPACE;
print <<EOM;
DEBUG_KB -$short_name flag: $str
final keys: @list
special flags: @tmp
EOM
}
return;
} ## end sub initialize_keep_old_breakpoints
sub initialize_tightness_vars {
# hashes used to simplify setting whitespace
%tightness = (
'{' => $rOpts->{'brace-tightness'},
'}' => $rOpts->{'brace-tightness'},
'(' => $rOpts->{'paren-tightness'},
')' => $rOpts->{'paren-tightness'},
'[' => $rOpts->{'square-bracket-tightness'},
']' => $rOpts->{'square-bracket-tightness'},
);
return;
} ## end sub initialize_tightness_vars
sub initialize_multiple_token_tightness {
# Initialization for --multiple-token-tightness
%multiple_token_tightness = ();
my $opt_name = 'multiple-token-tightness';
my $opt = $rOpts->{$opt_name};
# The default is to add spaces for the double diamond
if ( !$opt ) {
$multiple_token_tightness{'<<>>'} = 1;
return;
}
# These are valid input words for perltidy token types
# Note that 'qw' will be translated into the actual token type 'q'
my %is_type_option;
my @type_options = qw( <<>> qw Q h );
@is_type_option{@type_options} = (1) x scalar(@type_options);
# These are valid input words subtypes of token type 'Q'.
# Note qw must be treated specially and is in the previous list.
my %is_Q_subtype_option;
my @Q_subtype_options = qw( q qq qx qr s y tr m );
@is_Q_subtype_option{@Q_subtype_options} =
(1) x scalar(@Q_subtype_options);
my %is_valid_term = ( %is_type_option, %is_Q_subtype_option );
# Words can be negated by prefixing with the following character:
my $neg_char = '^';
# Scan the input
my %positive_input;
my %negative_input;
my $error_string = EMPTY_STRING;
if ( defined($opt) ) {
my @list = split_words($opt);
foreach my $word (@list) {
# The special word 'q*' means all of the Q_subtypes plus 'qw'
if ( $word eq 'q*' ) {
foreach (@Q_subtype_options) { $positive_input{$_} = 1 }
$positive_input{'qw'} = 1;
}
elsif ( $word eq $neg_char . 'q*' ) {
foreach (@Q_subtype_options) { $negative_input{$_} = 1 }
$negative_input{'qw'} = 1;
}
elsif ( $is_valid_term{$word} ) {
$positive_input{$word} = 1;
}
elsif ( substr( $word, 0, 1 ) eq $neg_char
&& $is_valid_term{ substr( $word, 1 ) } )
{
$negative_input{ substr( $word, 1 ) } = 1;
}
else {
$error_string .= "$word ";
}
}
}
if ($error_string) {
$error_string =~ s/\s+$//;
Warn(<<EOM);
Ignoring these unknown terms in --$opt_name: '$error_string'
EOM
}
# The token '<<>>' is always a default unless rejected
if ( !$negative_input{'<<>>'} ) {
$positive_input{'<<>>'} = 1;
}
# Now construct the control hash
my @Q_subtype_list;
foreach my $word ( keys %positive_input ) {
# negative has priority over positive
next if ( $negative_input{$word} );
if ( $is_type_option{$word} ) {
if ( $word eq 'qw' ) { $word = 'q' }
$multiple_token_tightness{$word} = 1;
}
elsif ( $is_Q_subtype_option{$word} ) {
push @Q_subtype_list, $word;
}
else {
# something is wrong; previous checks should prevent arriving here
DEVEL_MODE
&& Fault(
"unexpected word '$word' while initializing -mutt=$opt\n");
%multiple_token_tightness = ();
return;
}
}
# Construct a regex for the selected Q subtypes, in the form
# ^(?:qq|qx|qr|q|s|y|tr|m)\b
if (@Q_subtype_list) {
my $regex = q{^(?:} . join( '|', @Q_subtype_list ) . q{)\b};
if ( bad_pattern($regex) ) {
# shouldn't happen; there must be a coding error
my $msg =
"ERROR: the --$opt_name input caused an invalid regex '$regex'\n";
DEVEL_MODE && Fault($msg);
Warn($msg);
%multiple_token_tightness = ();
return;
}
$multiple_token_tightness{'Q'} = $regex;
}
return;
} ## end sub initialize_multiple_token_tightness
sub initialize_global_option_vars {
#------------------------------------------------------------
# Make global vars for frequently used options for efficiency
#------------------------------------------------------------
$rOpts_add_newlines = $rOpts->{'add-newlines'};
$rOpts_add_trailing_commas = $rOpts->{'add-trailing-commas'};
$rOpts_add_lone_trailing_commas = $rOpts->{'add-lone-trailing-commas'};
$rOpts_add_whitespace = $rOpts->{'add-whitespace'};
$rOpts_blank_lines_after_opening_block =
$rOpts->{'blank-lines-after-opening-block'};
$rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
$rOpts_block_brace_vertical_tightness =
$rOpts->{'block-brace-vertical-tightness'};
$rOpts_brace_follower_vertical_tightness =
$rOpts->{'brace-follower-vertical-tightness'};
$rOpts_break_after_labels = $rOpts->{'break-after-labels'};
$rOpts_break_at_old_attribute_breakpoints =
$rOpts->{'break-at-old-attribute-breakpoints'};
$rOpts_break_at_old_comma_breakpoints =
$rOpts->{'break-at-old-comma-breakpoints'};
$rOpts_break_at_old_keyword_breakpoints =
$rOpts->{'break-at-old-keyword-breakpoints'};
$rOpts_break_at_old_logical_breakpoints =
$rOpts->{'break-at-old-logical-breakpoints'};
$rOpts_break_at_old_semicolon_breakpoints =
$rOpts->{'break-at-old-semicolon-breakpoints'};
$rOpts_break_at_old_ternary_breakpoints =
$rOpts->{'break-at-old-ternary-breakpoints'};
$rOpts_break_open_compact_parens = $rOpts->{'break-open-compact-parens'};
$rOpts_closing_side_comments = $rOpts->{'closing-side-comments'};
$rOpts_closing_side_comment_else_flag =
$rOpts->{'closing-side-comment-else-flag'};
$rOpts_closing_side_comment_maximum_text =
$rOpts->{'closing-side-comment-maximum-text'};
$rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
$rOpts_cuddled_paren_brace = $rOpts->{'cuddled-paren-brace'};
$rOpts_delete_closing_side_comments =
$rOpts->{'delete-closing-side-comments'};
$rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
$rOpts_extended_continuation_indentation =
$rOpts->{'extended-continuation-indentation'};
$rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
$rOpts_delete_trailing_commas = $rOpts->{'delete-trailing-commas'};
$rOpts_delete_lone_trailing_commas =
$rOpts->{'delete-lone-trailing-commas'};
$rOpts_delete_weld_interfering_commas =
$rOpts->{'delete-weld-interfering-commas'};
$rOpts_format_skipping = $rOpts->{'format-skipping'};
$rOpts_freeze_whitespace = $rOpts->{'freeze-whitespace'};
$rOpts_function_paren_vertical_alignment =
$rOpts->{'function-paren-vertical-alignment'};
$rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
$rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
$rOpts_ignore_side_comment_lengths =
$rOpts->{'ignore-side-comment-lengths'};
$rOpts_ignore_perlcritic_comments = $rOpts->{'ignore-perlcritic-comments'};
$rOpts_indent_closing_brace = $rOpts->{'indent-closing-brace'};
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_indent_leading_semicolon = $rOpts->{'indent-leading-semicolon'};
$rOpts_indent_only = $rOpts->{'indent-only'};
$rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
$rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
$rOpts_extended_block_tightness = $rOpts->{'extended-block-tightness'};
$rOpts_extended_line_up_parentheses =
$rOpts->{'extended-line-up-parentheses'};
$rOpts_logical_padding = $rOpts->{'logical-padding'};
$rOpts_maximum_consecutive_blank_lines =
$rOpts->{'maximum-consecutive-blank-lines'};
$rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
$rOpts_minimize_continuation_indentation =
$rOpts->{'minimize-continuation-indentation'};
$rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
$rOpts_opening_brace_always_on_right =
$rOpts->{'opening-brace-always-on-right'};
$rOpts_outdent_keywords = $rOpts->{'outdent-keywords'};
$rOpts_outdent_labels = $rOpts->{'outdent-labels'};
$rOpts_outdent_long_comments = $rOpts->{'outdent-long-comments'};
$rOpts_outdent_long_quotes = $rOpts->{'outdent-long-quotes'};
$rOpts_outdent_static_block_comments =
$rOpts->{'outdent-static-block-comments'};
$rOpts_recombine = $rOpts->{'recombine'};
$rOpts_qw_as_function = $rOpts->{'qw-as-function'};
$rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
$rOpts_space_prototype_paren = $rOpts->{'space-prototype-paren'};
$rOpts_space_signature_paren = $rOpts->{'space-signature-paren'};
$rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
$rOpts_static_block_comments = $rOpts->{'static-block-comments'};
$rOpts_add_missing_else = $rOpts->{'add-missing-else'};
$rOpts_warn_missing_else = $rOpts->{'warn-missing-else'};
$rOpts_tee_block_comments = $rOpts->{'tee-block-comments'};
$rOpts_tee_pod = $rOpts->{'tee-pod'};
$rOpts_tee_side_comments = $rOpts->{'tee-side-comments'};
$rOpts_valign_code = $rOpts->{'valign-code'};
$rOpts_valign_side_comments = $rOpts->{'valign-side-comments'};
$rOpts_valign_if_unless = $rOpts->{'valign-if-unless'};
$rOpts_valign_wide_equals = $rOpts->{'valign-wide-equals'};
$rOpts_variable_maximum_line_length =
$rOpts->{'variable-maximum-line-length'};
$rOpts_warn_unique_keys_cutoff = $rOpts->{'warn-unique-keys-cutoff'};
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
%opening_vertical_tightness = (
'(' => $rOpts->{'paren-vertical-tightness'},
'{' => $rOpts->{'brace-vertical-tightness'},
'[' => $rOpts->{'square-bracket-vertical-tightness'},
')' => $rOpts->{'paren-vertical-tightness'},
'}' => $rOpts->{'brace-vertical-tightness'},
']' => $rOpts->{'square-bracket-vertical-tightness'},
);
%closing_vertical_tightness = (
'(' => $rOpts->{'paren-vertical-tightness-closing'},
'{' => $rOpts->{'brace-vertical-tightness-closing'},
'[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
')' => $rOpts->{'paren-vertical-tightness-closing'},
'}' => $rOpts->{'brace-vertical-tightness-closing'},
']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
);
# assume flag for '>' same as ')' for closing qw quotes
%closing_token_indentation = (
')' => $rOpts->{'closing-paren-indentation'},
'}' => $rOpts->{'closing-brace-indentation'},
']' => $rOpts->{'closing-square-bracket-indentation'},
'>' => $rOpts->{'closing-paren-indentation'},
);
# flag indicating if any closing tokens are indented
$some_closing_token_indentation =
$rOpts->{'closing-paren-indentation'}
|| $rOpts->{'closing-brace-indentation'}
|| $rOpts->{'closing-square-bracket-indentation'}
|| $rOpts->{'indent-closing-brace'};
%opening_token_right = (
'(' => $rOpts->{'opening-paren-right'},
'{' => $rOpts->{'opening-hash-brace-right'},
'[' => $rOpts->{'opening-square-bracket-right'},
);
%stack_opening_token = (
'(' => $rOpts->{'stack-opening-paren'},
'{' => $rOpts->{'stack-opening-hash-brace'},
'[' => $rOpts->{'stack-opening-square-bracket'},
);
%stack_closing_token = (
')' => $rOpts->{'stack-closing-paren'},
'}' => $rOpts->{'stack-closing-hash-brace'},
']' => $rOpts->{'stack-closing-square-bracket'},
);
return;
} ## end sub initialize_global_option_vars
sub initialize_line_length_vars {
# Create a table of maximum line length vs level for later efficient use.
# We will make the tables very long to be sure it will not be exceeded.
# But we have to choose a fixed length. A check will be made at the start
# of sub 'finish_formatting' to be sure it is not exceeded. Note, some of
# my standard test problems have indentation levels of about 150, so this
# should be fairly large. If the choice of a maximum level ever becomes
# an issue then these table values could be returned in a sub with a simple
# memoization scheme.
# Also create a table of the maximum spaces available for text due to the
# level only. If a line has continuation indentation, then that space must
# be subtracted from the table value. This table is used for preliminary
# estimates in welding, extended_ci, BBX, and marking short blocks.
use constant LEVEL_TABLE_MAX => 1000;
# The basic scheme:
foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
my $indent = $level * $rOpts_indent_columns;
$maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
$maximum_text_length_at_level[$level] =
$rOpts_maximum_line_length - $indent;
}
# Correct the maximum_text_length table if the -wc=n flag is used
$rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
if ($rOpts_whitespace_cycle) {
if ( $rOpts_whitespace_cycle > 0 ) {
foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
my $level_mod = $level % $rOpts_whitespace_cycle;
my $indent = $level_mod * $rOpts_indent_columns;
$maximum_text_length_at_level[$level] =
$rOpts_maximum_line_length - $indent;
}
}
else {
$rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
}
}
# Correct the tables if the -vmll flag is used. These values override the
# previous values.
if ($rOpts_variable_maximum_line_length) {
foreach my $level ( 0 .. LEVEL_TABLE_MAX ) {
$maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
$maximum_line_length_at_level[$level] =
$rOpts_maximum_line_length + $level * $rOpts_indent_columns;
}
}
# Define two measures of indentation level, alpha and beta, at which some
# formatting features come under stress and need to start shutting down.
# Some combination of the two will be used to shut down different
# formatting features.
# Put a reasonable upper limit on stress level (say 100) in case the
# whitespace-cycle variable is used.
my $stress_level_limit = min( 100, LEVEL_TABLE_MAX );
# Find stress_level_alpha, targeted at very short maximum line lengths.
$stress_level_alpha = $stress_level_limit + 1;
foreach my $level_test ( 0 .. $stress_level_limit ) {
my $max_len = $maximum_text_length_at_level[ $level_test + 1 ];
my $excess_inside_space =
$max_len -
$rOpts_continuation_indentation -
$rOpts_indent_columns - 8;
if ( $excess_inside_space <= 0 ) {
$stress_level_alpha = $level_test;
last;
}
}
# Find stress level beta, a stress level targeted at formatting
# at deep levels near the maximum line length. We start increasing
# from zero and stop at the first level which shows no more space.
# 'const' is a fixed number of spaces for a typical variable.
# Cases b1197-b1204 work ok with const=12 but not with const=8
my $const = 16;
my $denom = max( 1, $rOpts_indent_columns );
$stress_level_beta = 0;
foreach my $level ( 0 .. $stress_level_limit ) {
my $remaining_cycles = max(
0,
(
$maximum_text_length_at_level[$level] -
$rOpts_continuation_indentation - $const
) / $denom
);
last if ( $remaining_cycles <= 3 ); # 2 does not work
$stress_level_beta = $level;
}
# This is a combined level which works well for turning off formatting
# features in most cases:
$high_stress_level = min( $stress_level_alpha, $stress_level_beta + 2 );
return;
} ## end sub initialize_line_length_vars
sub initialize_trailing_comma_break_rules {
# Setup control hash for making trailing comma breaks. Update c416.
# This sub is similar to 'sub initialize_trailing_comma_rules' but
# simpler.
# -btct=s, where s
#
# =" " none
# =0 : none
# =1 or * : all
# =m : break at trailing commas in multiline lists
# =b : break at bare trailing commas
%trailing_comma_break_rules = ();
my $rvalid_flags = [qw( 0 1 * m b )];
# Note that the hash keys are the CLOSING tokens but the input
# uses OPENING tokens.
my @all_keys = qw< ) ] } >;
my $option = $rOpts->{'break-at-trailing-comma-types'};
if ($option) {
$option =~ s/^\s+//;
$option =~ s/\s+$//;
}
# We need to use length() here because '0' is a possible option
if ( defined($option) && length($option) ) {
my $error_message;
my %rule_hash;
my @q = @{$rvalid_flags};
my %is_valid_flag;
@is_valid_flag{@q} = (1) x scalar(@q);
# handle the common case of a single control character, like -btct='b'
if ( length($option) == 1 ) {
# skip 0
if ($option) {
foreach my $key (@all_keys) {
$rule_hash{$key} = [ $option, EMPTY_STRING ];
}
}
}
# handle multi-character control(s), such as -btct='[m' or -btct='k(m'
else {
my @parts = split /\s+/, $option;
foreach my $part (@parts) {
my $part_input = $part;
# examples: b -b [b 0 * +f(b
# the letter value is the rightmost character
my $val = substr( $part, -1, 1 );
# skip 0
next unless ($val);
$part = substr( $part, 0, -1 );
if ( $val && !$is_valid_flag{$val} ) {
my $valid_str = join( SPACE, @{$rvalid_flags} );
$error_message .=
"In '$part_input': unexpected value '$val'; must be one of: $valid_str\n";
next;
}
# set defaults for this item
my @keys = @all_keys;
my $paren_flag = EMPTY_STRING;
# look for opening container bracket
my $is_paren;
if ( length($part) ) {
my $token = substr( $part, -1, 1 );
if ( $is_opening_token{$token} ) {
# note that the hash key is the closing token
my $key = $matching_token{$token};
@keys = ($key);
$part = substr( $part, 0, -1 );
$is_paren = $token eq '(';
}
}
# anything left must be a paren modifier
if ( length($part) ) {
$paren_flag = substr( $part, -1, 1 );
$part = substr( $part, 0, -1 );
if ( $paren_flag !~ /^[kKfFwW]$/ ) {
$error_message .=
"In '$part_input': Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n";
next;
}
if ( !$is_paren ) {
$error_message .=
"In '$part_input': paren flag '$paren_flag' is only allowed before a '('\n";
next;
}
}
if ( length($part) ) {
$error_message .= "Unrecognized term: '$part_input'\n";
next;
}
my $duplicate;
foreach my $key (@keys) {
if ( defined( $rule_hash{$key} ) ) {
$duplicate = 1;
}
$rule_hash{$key} = [ $val, $paren_flag ];
}
if ($duplicate) {
$error_message .=
"This term overlaps a previous term: '$part_input'\n";
}
}
}
# check for conflicting signed options
if ($error_message) {
Warn(<<EOM);
Error parsing --want-trailing-commas='$option':
$error_message
EOM
}
# Set the control hash if no errors
else {
%trailing_comma_break_rules = %rule_hash;
}
}
return;
} ## end sub initialize_trailing_comma_break_rules
sub initialize_trailing_comma_rules {
# Setup control hash for trailing commas
# -wtc=s defines desired trailing comma policy:
#
# =" " stable
# [ both -atc and -dtc ignored ]
# =0 : none
# [requires -dtc; -atc ignored]
# =1 or * : all
# [requires -atc; -dtc ignored]
# =m : multiline lists require trailing comma
# if -atc set => will add missing multiline trailing commas
# if -dtc set => will delete trailing single line commas
# =b or 'bare' (multiline) lists require trailing comma
# if -atc set => will add missing bare trailing commas
# if -dtc set => will delete non-bare trailing commas
# =h or 'hash': single column stable bare lists require trailing comma
# if -atc set will add these
# if -dtc set will delete other trailing commas
#-------------------------------------------------------------------
# Important:
# - This routine must be called after the alpha and beta stress levels
# have been defined in sub 'initialize_line_length_vars'.
# - and it must be called after sub 'initialize_trailing_comma_break_rules'
#-------------------------------------------------------------------
%trailing_comma_rules = ();
my $rvalid_flags = [qw( 0 1 * m b h i )];
# This hash shows i.e. that 'm' includes all 'b' includes all 'i' ...etc
# It is used to check for overlap when both + and - signs are used to
# cause adding and deleting of different types of trailing commas.
my %match_order = (
'1' => 0,
'*' => 0,
'm' => 1,
'b' => 2,
'i' => 3,
'h' => 4,
'0' => 5,
);
# Note that the hash keys are the CLOSING tokens but the input
# uses OPENING tokens.
my @all_keys = qw< ) ] } >;
my $option = $rOpts->{'want-trailing-commas'};
if ($option) {
$option =~ s/^\s+//;
$option =~ s/\s+$//;
}
# Pull out -btct paren flag for use in checking stability in marginal cases
my ( $tc_letter, $tc_paren_flag );
my $tc_paren_rule = $trailing_comma_break_rules{')'};
if ( defined($tc_paren_rule) ) {
( $tc_letter, $tc_paren_flag ) = @{$tc_paren_rule};
}
# We need to use length() here because '0' is a possible option
if ( defined($option) && length($option) ) {
my $error_message;
my %rule_hash;
my @q = @{$rvalid_flags};
my %is_valid_flag;
@is_valid_flag{@q} = (1) x scalar(@q);
# handle the common case of a single control character, like -wtc='b'
if ( length($option) == 1 ) {
foreach my $key (@all_keys) {
my $paren_flag = EMPTY_STRING;
my $stable = defined( $trailing_comma_break_rules{$key} );
if ( $key eq ')' ) { $stable &&= $paren_flag eq $tc_paren_flag }
$rule_hash{add}->{$key} = [ $option, $paren_flag, $stable ];
$rule_hash{delete}->{$key} = [ $option, $paren_flag, $stable ];
}
}
# handle multi-character control(s), such as -wtc='[m' or -wtc='k(m'
else {
my @parts = split /\s+/, $option;
foreach my $part (@parts) {
my $part_input = $part;
# examples: b -b [b 0 * +f(b
# the letter value is the rightmost character
my $val = substr( $part, -1, 1 );
$part = substr( $part, 0, -1 );
if ( $val && !$is_valid_flag{$val} ) {
my $valid_str = join( SPACE, @{$rvalid_flags} );
$error_message .=
"In '$part_input': unexpected value '$val'; must be one of: $valid_str\n";
next;
}
# set defaults for this item
my @signs = qw( add delete );
my @keys = @all_keys;
my $paren_flag = EMPTY_STRING;
# look for opening container bracket
my $is_paren;
if ( length($part) ) {
my $token = substr( $part, -1, 1 );
if ( $is_opening_token{$token} ) {
# note that the hash key is the closing token
my $key = $matching_token{$token};
@keys = ($key);
$part = substr( $part, 0, -1 );
$is_paren = $token eq '(';
}
}
# look for a leading sign, + or -
if ( length($part) ) {
my $sign = substr( $part, 0, 1 );
if ( $sign eq '+' ) {
@signs = qw(add);
$part = substr( $part, 1 );
}
elsif ( $sign eq '-' ) {
@signs = qw(delete);
$part = substr( $part, 1 );
}
else {
## keep defaults
}
}
# anything left must be a paren modifier
if ( length($part) ) {
$paren_flag = substr( $part, -1, 1 );
$part = substr( $part, 0, -1 );
if ( $paren_flag !~ /^[kKfFwW]$/ ) {
$error_message .=
"In '$part_input': Unexpected paren flag '$paren_flag'; must be one of: k K f F w W\n";
next;
}
if ( !$is_paren ) {
$error_message .=
"In '$part_input': paren flag '$paren_flag' is only allowed before a '('\n";
next;
}
}
if ( length($part) ) {
$error_message .= "Unrecognized term: '$part_input'\n";
next;
}
my $duplicate;
foreach my $sign (@signs) {
foreach my $key (@keys) {
# New bare commas are stable if -bctc is set, and
# also paren flags do not disagree
my $stable =
defined( $trailing_comma_break_rules{$key} );
if ( $key eq ')' ) {
$stable &&= $paren_flag eq $tc_paren_flag;
}
if ( defined( $rule_hash{$sign}->{$key} ) ) {
$duplicate &&= 1;
}
$rule_hash{$sign}->{$key} =
[ $val, $paren_flag, $stable ];
}
}
if ($duplicate) {
$error_message .=
"This term overlaps a previous term: '$part_input'\n";
}
}
}
# check for conflicting signed options
if ( !$error_message ) {
my $radd = $rule_hash{add};
my $rdelete = $rule_hash{delete};
if ( defined($radd) && defined($rdelete) ) {
foreach my $key (@all_keys) {
my $radd_info = $radd->{$key};
my $rdelete_info = $rdelete->{$key};
if ( defined($radd_info) && defined($rdelete_info) ) {
my $add_val = $radd_info->[0];
my $delete_val = $rdelete_info->[0];
next if ( $add_val eq $delete_val );
my $add_order = $match_order{$add_val};
my $delete_order = $match_order{$delete_val};
if ( !defined($add_order) ) {
## should have been caught earlier
DEVEL_MODE
&& Fault("unexpected + value $add_val\n");
next;
}
if ( !defined($delete_order) ) {
## should have been caught earlier
DEVEL_MODE
&& Fault("unexpected - value $delete_val\n");
next;
}
if ( $add_order <= $delete_order ) {
my $token = $matching_token{$key};
$error_message .=
"For token '$token': the range for '+$add_val' overlaps the range for '-$delete_val'\n";
}
}
}
}
}
if ($error_message) {
Warn(<<EOM);
Error parsing --want-trailing-commas='$option':
$error_message
EOM
}
# Set the control hash if no errors
else {
%trailing_comma_rules = %rule_hash;
}
}
# Both adding and deleting commas can lead to instability in extreme cases
if ( $rOpts_add_trailing_commas && $rOpts_delete_trailing_commas ) {
# If the possible instability is significant, then we can turn off
# -dtc as a defensive measure to prevent it.
# We must turn off -dtc for very small values of --whitespace-cycle
# to avoid instability. A minimum value of -wc=3 fixes b1393, but a
# value of 4 is used here for safety. This parameter is seldom used,
# and much larger than this when used, so the cutoff value is not
# critical.
if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle <= 4 ) {
$rOpts_delete_trailing_commas = 0;
}
}
return;
} ## end sub initialize_trailing_comma_rules
sub initialize_interbracket_arrow_style {
# Setup hash for desired arrow style
%interbracket_arrow_style = ();
# and check other parameters for conflicts
my $name_add = 'add-interbracket-arrows';
my $name_delete = 'delete-interbracket-arrows';
my $name_style = 'interbracket-arrow-style';
my $opt_add = $rOpts->{$name_add};
my $opt_delete = $rOpts->{$name_delete};
my $opt_style = $rOpts->{$name_style};
if ( $opt_add && $opt_delete && !$opt_style ) {
Die(<<EOM);
Cannot use both --$name_add and --$name_delete
unless --$name_style is defined
EOM
}
return unless defined($opt_style);
$opt_style =~ tr/,/ /;
$opt_style =~ s/^\s+//;
$opt_style =~ s/\s+$//;
return unless length($opt_style);
if ( $opt_style eq '0' ) { $opt_style = '] [ ] { } [ } {' }
elsif ( $opt_style eq '1' ) { $opt_style = ']->[ ]->{ }->[ }->{' }
elsif ( $opt_style eq '*' ) { $opt_style = ']->[ ]->{ }->[ }->{' }
else { }
# We are walking along a string such as
# $opt_style=" ][ ]->{ }->[ }{ ";
# ignoring spaces and looking for bracket pairs with optional
# arrow like: '][' or ]->{ or }->[ or }{
# The two bracket characters are the hash key and the hash value
# is 1 for an arrow and -1 for no arrow.
# $ch1 will hold most recent closing bracket
# $ch2 will hold a '->' if seen
my %rule_hash;
my ( $ch1, $ch2 );
my $err_msg;
my $pos_last;
while (1) {
$pos_last = pos($opt_style);
if (
$opt_style =~ m{
\G(?: # fix git #142
(\s+) # 1. whitespace
| ([\}\]]) # 2. closing bracket
| (->) # 3. arrow
| ([\[\{]) # 4. opening bracket
| (.*) # 5. something else, error
)
}gcx
)
{
if ($1) { next }
if ($2) {
if ( !$ch1 ) { $ch1 = $2 }
else { $err_msg = "unexpected '$2'"; last }
next;
}
if ($3) {
if ($ch1) { $ch2 = $3 }
else { $err_msg = "unexpected '$3'"; last }
next;
}
if ($4) {
if ( $ch1 || $ch2 ) {
my $key = $ch1 . $4;
if ( !defined( $rule_hash{$key} ) ) {
$rule_hash{$key} = $ch2 ? 1 : -1;
}
else { $err_msg = "multiple copies for '$key'"; last; }
$ch1 = $ch2 = undef;
}
else { $err_msg = "unexpected '$4'"; last }
next;
}
if ($5) {
my $bad = $5;
if ( length($bad) > 10 ) {
$bad = substr( $bad, 0, 10 ) . '...';
}
$err_msg = "confused at: '$bad'\n";
last;
}
}
# that's all..
else {
last;
}
} ## end while (1)
if ($err_msg) {
my $msg;
if ( $pos_last && length($opt_style) < 20 ) {
$msg = $opt_style . "\n" . SPACE x $pos_last . '^' . "\n";
}
$msg .= "Error parsing --$name_style: $err_msg";
Die($msg);
}
# Copy the rule hash, converting braces to token types
foreach my $key ( keys %rule_hash ) {
my $key_fix = $key;
$key_fix =~ tr/{}/LR/;
$interbracket_arrow_style{$key_fix} = $rule_hash{$key};
}
return;
} ## end sub initialize_interbracket_arrow_style
sub initialize_whitespace_hashes {
# This is called once before formatting begins to initialize these global
# hashes, which control the use of whitespace around tokens:
#
# %binary_ws_rules
# %want_left_space
# %want_right_space
# %space_after_keyword
#
# Many token types are identical to the tokens themselves.
# See the tokenizer for a complete list. Here are some special types:
# k = perl keyword
# f = semicolon in for statement
# m = unary minus
# p = unary plus
# Note that :: is excluded since it should be contained in an identifier
# Note that '->' is excluded because it never gets space
# parentheses and brackets are excluded since they are handled specially
# curly braces are included but may be overridden by logic, such as
# newline logic.
# NEW_TOKENS: create a whitespace rule here. This can be as
# simple as adding your new letter to @spaces_both_sides, for
# example.
# fix for c250: added space rules new package type 'P' and sub type 'S'
my @spaces_both_sides = qw#
+ - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
.= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
**= &&= ||= //= <=> A k f w F n C Y U G v P S ^^
#;
my @spaces_left_side = qw< t ! ~ m p { \ h pp mm Z j >;
push( @spaces_left_side, '#' ); # avoids warning message
# c349: moved **= from @spaces_right_side to @spaces_both_sides
my @spaces_right_side = qw< ; } ) ] R J ++ -- >;
push( @spaces_right_side, ',' ); # avoids warning message
%want_left_space = ();
%want_right_space = ();
%binary_ws_rules = ();
# Note that we setting defaults here. Later in processing
# the values of %want_left_space and %want_right_space
# may be overridden by any user settings specified by the
# -wls and -wrs parameters. However the binary_whitespace_rules
# are hardwired and have priority.
@want_left_space{@spaces_both_sides} =
(1) x scalar(@spaces_both_sides);
@want_right_space{@spaces_both_sides} =
(1) x scalar(@spaces_both_sides);
@want_left_space{@spaces_left_side} =
(1) x scalar(@spaces_left_side);
@want_right_space{@spaces_left_side} =
(-1) x scalar(@spaces_left_side);
@want_left_space{@spaces_right_side} =
(-1) x scalar(@spaces_right_side);
@want_right_space{@spaces_right_side} =
(1) x scalar(@spaces_right_side);
$want_left_space{'->'} = WS_NO;
$want_right_space{'->'} = WS_NO;
$want_left_space{'**'} = WS_NO;
$want_right_space{'**'} = WS_NO;
$want_right_space{'CORE::'} = WS_NO;
# These binary_ws_rules are hardwired and have priority over the above
# settings. It would be nice to allow adjustment by the user,
# but it would be complicated to specify.
#
# hash type information must stay tightly bound
# as in : ${xxxx}
$binary_ws_rules{'i'}{'L'} = WS_NO;
$binary_ws_rules{'i'}{'{'} = WS_YES;
$binary_ws_rules{'k'}{'{'} = WS_YES;
$binary_ws_rules{'U'}{'{'} = WS_YES;
$binary_ws_rules{'i'}{'['} = WS_NO;
$binary_ws_rules{'R'}{'L'} = WS_NO;
$binary_ws_rules{'R'}{'{'} = WS_NO;
$binary_ws_rules{'t'}{'L'} = WS_NO;
$binary_ws_rules{'t'}{'{'} = WS_NO;
$binary_ws_rules{'t'}{'='} = WS_OPTIONAL; # for signatures; fixes b1123
$binary_ws_rules{'}'}{'L'} = WS_NO;
$binary_ws_rules{'}'}{'{'} = WS_OPTIONAL; # RT#129850; was WS_NO
$binary_ws_rules{'$'}{'L'} = WS_NO;
$binary_ws_rules{'$'}{'{'} = WS_NO;
$binary_ws_rules{'@'}{'L'} = WS_NO;
$binary_ws_rules{'@'}{'{'} = WS_NO;
$binary_ws_rules{'='}{'L'} = WS_YES;
$binary_ws_rules{'J'}{'J'} = WS_YES;
# the following includes ') {'
# as in : if ( xxx ) { yyy }
$binary_ws_rules{']'}{'L'} = WS_NO;
$binary_ws_rules{']'}{'{'} = WS_NO;
$binary_ws_rules{')'}{'{'} = WS_YES;
$binary_ws_rules{')'}{'['} = WS_NO;
$binary_ws_rules{']'}{'['} = WS_NO;
$binary_ws_rules{']'}{'{'} = WS_NO;
$binary_ws_rules{'}'}{'['} = WS_NO;
$binary_ws_rules{'R'}{'['} = WS_NO;
$binary_ws_rules{']'}{'++'} = WS_NO;
$binary_ws_rules{']'}{'--'} = WS_NO;
$binary_ws_rules{')'}{'++'} = WS_NO;
$binary_ws_rules{')'}{'--'} = WS_NO;
$binary_ws_rules{'R'}{'++'} = WS_NO;
$binary_ws_rules{'R'}{'--'} = WS_NO;
$binary_ws_rules{'i'}{'Q'} = WS_YES;
$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
$binary_ws_rules{'i'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'{'} = WS_YES;
# user controls
if ( !$rOpts->{'space-for-semicolon'} ) {
$want_left_space{'f'} = -1;
}
if ( $rOpts->{'space-terminal-semicolon'} ) {
$want_left_space{';'} = 1;
}
# implement user whitespace preferences
if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
@want_left_space{@q} = (1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
@want_right_space{@q} = (1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
@want_left_space{@q} = (-1) x scalar(@q);
}
if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
@want_right_space{@q} = (-1) x scalar(@q);
}
return;
} ## end sub initialize_whitespace_hashes
{ #<<< begin closure set_whitespace_flags
my %is_special_ws_type;
my %is_wCUG;
my %is_wi;
BEGIN {
# The following hash is used to skip over needless if tests.
# Be sure to update it when adding new checks in its block.
my @q = qw( k w C m - Q );
push @q, '#';
@is_special_ws_type{@q} = (1) x scalar(@q);
# These hashes replace slower regex tests
@q = qw( w C U G );
@is_wCUG{@q} = (1) x scalar(@q);
@q = qw( w i );
@is_wi{@q} = (1) x scalar(@q);
} ## end BEGIN
use constant DEBUG_WHITE => 0;
# Hashes to set spaces around container tokens according to their
# sequence numbers. These are set as keywords are examined.
# They are controlled by the -kpit and -kpitl flags.
my %opening_container_inside_ws;
my %closing_container_inside_ws;
sub set_whitespace_flags {
my $self = shift;
# This routine is called once per file to set whitespace flags for that
# file. This routine examines each pair of nonblank tokens and sets a flag
# indicating if they should be separated by white space.
#
# $rwhitespace_flags->[$j] is a flag indicating whether a white space
# BEFORE token $j is needed, with the following values:
#
# WS_NO = -1 do not want a space BEFORE token $j
# WS_OPTIONAL= 0 optional space or $j is a whitespace
# WS_YES = 1 want a space BEFORE token $j
#
my $j_tight_closing_paren = -1;
my $rLL = $self->[_rLL_];
my $K_closing_container = $self->[_K_closing_container_];
my $jmax = @{$rLL} - 1;
my $rtightness_override_by_seqno = $self->[_rtightness_override_by_seqno_];
%opening_container_inside_ws = ();
%closing_container_inside_ws = ();
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
my $rOpts_space_function_paren = $rOpts->{'space-function-paren'};
my $rwhitespace_flags = [];
my $ris_function_call_paren = {};
return $rwhitespace_flags if ( $jmax < 0 );
# function to return $ws for a signature paren following a sub
my $ws_signature_paren = sub {
my ($jj) = @_;
my $ws;
if ( $rOpts_space_signature_paren == 1 ) {
# is the previous token a blank?
my $have_blank = $rLL->[ $jj - 1 ]->[_TYPE_] eq 'b';
# or a newline?
$have_blank ||=
$rLL->[$jj]->[_LINE_INDEX_] != $rLL->[ $jj - 1 ]->[_LINE_INDEX_];
$ws = $have_blank ? WS_YES : WS_NO;
}
else {
$ws = $rOpts_space_signature_paren == 0 ? WS_NO : WS_YES;
}
return $ws;
}; ## end $ws_signature_paren = sub
my $last_token = SPACE;
my $last_type = 'b';
my $last_token_dbg = SPACE;
my $last_type_dbg = 'b';
my $rtokh_last = [ @{ $rLL->[0] } ];
$rtokh_last->[_TOKEN_] = $last_token;
$rtokh_last->[_TYPE_] = $last_type;
$rtokh_last->[_TYPE_SEQUENCE_] = EMPTY_STRING;
$rtokh_last->[_LINE_INDEX_] = 0;
my $rtokh_last_last = $rtokh_last;
# This will identify braces to be treated as blocks for the -xbt flag
my %block_type_for_tightness;
my ( $ws_1, $ws_2, $ws_3, $ws_4 );
# main loop over all tokens to define the whitespace flags
my $last_type_is_opening;
my $j = -1;
my $type;
foreach my $rtokh ( @{$rLL} ) {
$j++;
if ( ( $type = $rtokh->[_TYPE_] ) eq 'b' ) {
$rwhitespace_flags->[$j] = WS_OPTIONAL;
next;
}
my $token = $rtokh->[_TOKEN_];
my $ws;
#---------------------------------------------------------------
# Whitespace Rules Section 1:
# Handle space on the inside of opening braces.
#---------------------------------------------------------------
if ($last_type_is_opening) {
$last_type_is_opening = 0;
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
my $block_type = $rblock_type_of_seqno->{$seqno};
my $last_seqno = $rtokh_last->[_TYPE_SEQUENCE_];
my $last_block_type = $rblock_type_of_seqno->{$last_seqno}
|| $block_type_for_tightness{$last_seqno};
$j_tight_closing_paren = -1;
# let us keep empty matched braces together: () {} []
# except for BLOCKS
if ( $token eq $matching_token{$last_token} ) {
if ($block_type) {
$ws = WS_YES;
}
else {
$ws = WS_NO;
}
}
else {
# we're considering the right of an opening brace
# tightness = 0 means always pad inside with space
# tightness = 1 means pad inside if "complex"
# tightness = 2 means never pad inside with space
my $tightness_here;
if ( $last_block_type && $last_token eq '{' ) {
$tightness_here = $rOpts_block_brace_tightness;
}
else { $tightness_here = $tightness{$last_token} }
#=============================================================
# Patch for test problem <<snippets/fabrice_bug.in>>
# We must always avoid spaces around a bare word beginning
# with ^ as in:
# my $before = ${^PREMATCH};
# Because all of the following cause an error in perl:
# my $before = ${ ^PREMATCH };
# my $before = ${ ^PREMATCH};
# my $before = ${^PREMATCH };
# So if brace tightness flag is -bt=0 we must temporarily reset
# to bt=1. Note that here we must set tightness=1 and not 2 so
# that the closing space is also avoided
# (via the $j_tight_closing_paren flag in coding)
if ( $type eq 'w' && substr( $token, 0, 1 ) eq '^' ) {
$tightness_here = 1;
}
# c446
my $tseq = $rtightness_override_by_seqno->{$last_seqno};
if ( defined($tseq) ) { $tightness_here = $tseq }
#=============================================================
if ( $tightness_here <= 0 ) {
$ws = WS_YES;
}
elsif ( $tightness_here > 1 ) {
$ws = WS_NO;
}
# Default (tightness = 1) depends on the container token count
else {
# Find the index of the closing token
my $j_closing = $K_closing_container->{$last_seqno};
# Certain token types can be counted as multiple tokens for
# the default tightness. The meaning of hash values is:
# 1 => match this token type
# otherwise it is a regex; match if token matches regex
my $regex = $multiple_token_tightness{$type};
if ( $regex
&& ( length($regex) == 1 || $token =~ /$regex/ ) )
{
$ws = WS_YES;
}
# If the closing token is less than five characters ahead
# we must take a closer look
elsif ( defined($j_closing)
&& $j_closing - $j < 5
&& $rLL->[$j_closing]->[_TYPE_SEQUENCE_] eq
$last_seqno )
{
# quick check
if ( $j + 1 >= $j_closing ) {
$ws = WS_NO;
$j_tight_closing_paren = $j_closing;
}
# slow check
else {
$ws =
ws_in_container( $j, $j_closing, $rLL, $type,
$token, $last_token );
if ( $ws == WS_NO ) {
$j_tight_closing_paren = $j_closing;
}
}
}
else {
$ws = WS_YES;
}
}
}
# check for special cases which override the above rules
if ( %opening_container_inside_ws && $last_seqno ) {
my $ws_override = $opening_container_inside_ws{$last_seqno};
if ($ws_override) { $ws = $ws_override }
}
$ws_4 = $ws_3 = $ws_2 = $ws_1 = $ws
if DEBUG_WHITE;
} ## end setting space flag inside opening tokens
#---------------------------------------------------------------
# Whitespace Rules Section 2:
# Special checks for certain types ...
#---------------------------------------------------------------
# The hash '%is_special_ws_type' significantly speeds up this routine,
# but be sure to update it if a new check is added.
# Currently has types: qw(k w C m - Q #)
if ( $is_special_ws_type{$type} ) {
if ( $type eq 'k' ) {
# Keywords 'for', 'foreach' are special cases for -kpit since
# the opening paren does not always immediately follow the
# keyword. So we have to search forward for the paren in this
# case. I have limited the search to 10 tokens ahead, just in
# case somebody has a big file and no opening paren. This
# should be enough for all normal code. Added the level check
# to fix b1236.
if ( $is_for_foreach{$token}
&& %keyword_paren_inner_tightness
&& defined( $keyword_paren_inner_tightness{$token} )
&& $j < $jmax )
{
my $level = $rLL->[$j]->[_LEVEL_];
## NOTE: we might use the KNEXT variable to avoid this loop
## but profiling shows that little would be saved
foreach my $jp ( $j + 1 .. $j + 9 ) {
last if ( $jp > $jmax );
last if ( $rLL->[$jp]->[_LEVEL_] != $level ); # b1236
next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
my $seqno_p = $rLL->[$jp]->[_TYPE_SEQUENCE_];
set_container_ws_by_keyword( $token, $seqno_p );
last;
}
}
}
# handle a comment
elsif ( $type eq '#' ) {
# newline before block comment ($j==0), and
# space before side comment ($j>0), so ..
$ws = WS_YES;
#---------------------------------
# Nothing more to do for a comment
#---------------------------------
$rwhitespace_flags->[$j] = $ws;
next;
}
# space_backslash_quote; RT #123774 <<snippets/rt123774.in>>
# allow a space between a backslash and single or double quote
# to avoid fooling html formatters
elsif ( $type eq 'Q' ) {
if ( $last_type eq '\\' && $token =~ /^[\"\']/ ) {
$ws =
!$rOpts_space_backslash_quote ? WS_NO
: $rOpts_space_backslash_quote == 1 ? WS_OPTIONAL
: $rOpts_space_backslash_quote == 2 ? WS_YES
: WS_YES;
}
}
# retain any space between '-' and bare word
elsif ( $type eq 'w' || $type eq 'C' ) {
$ws = WS_OPTIONAL if $last_type eq '-';
}
# retain any space between '-' and bare word; for example
# avoid space between 'USER' and '-' here: <<snippets/space2.in>>
# $myhash{USER-NAME}='steve';
elsif ( $type eq 'm' || $type eq '-' ) {
$ws = WS_OPTIONAL if ( $last_type eq 'w' );
}
else {
# A type $type was entered in %is_special_ws_type but
# there is no code block to handle it. Either remove it
# from the hash or add a code block to handle it.
DEVEL_MODE && Fault("no code to handle type $type\n");
}
} ## end elsif ( $is_special_ws_type{$type} ...
#---------------------------------------------------------------
# Whitespace Rules Section 3:
# Handle space on inside of closing brace pairs.
#---------------------------------------------------------------
# /[\}\)\]R]/
elsif ( $is_closing_type{$type} ) {
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
if ( $j == $j_tight_closing_paren ) {
$j_tight_closing_paren = -1;
$ws = WS_NO;
}
else {
if ( !defined($ws) ) {
my $tightness_here;
my $block_type = $rblock_type_of_seqno->{$seqno}
|| $block_type_for_tightness{$seqno};
if ( $block_type && $token eq '}' ) {
$tightness_here = $rOpts_block_brace_tightness;
}
else { $tightness_here = $tightness{$token} }
$ws = ( $tightness_here > 1 ) ? WS_NO : WS_YES;
}
}
# check for special cases which override the above rules
if ( %closing_container_inside_ws && $seqno ) {
my $ws_override = $closing_container_inside_ws{$seqno};
if ($ws_override) { $ws = $ws_override }
}
# c446
my $tseq = $rtightness_override_by_seqno->{$seqno};
if ( defined($tseq) ) { $ws = $tseq > 0 ? WS_NO : WS_YES }
$ws_4 = $ws_3 = $ws_2 = $ws
if DEBUG_WHITE;
} ## end setting space flag inside closing tokens
#---------------------------------------------------------------
# Whitespace Rules Section 4:
#---------------------------------------------------------------
elsif ( $is_opening_type{$type} ) {
$last_type_is_opening = 1;
if ( $token eq '(' ) {
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
# This will have to be tweaked as tokenization changes.
# We usually want a space at '} (', for example:
# <<snippets/space1.in>>
# map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
#
# But not others:
# &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
# At present, the above & block is marked as type L/R so this
# case won't go through here.
if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
# NOTE: some older versions of Perl had occasional problems if
# spaces are introduced between keywords or functions and
# opening parens. So the default is not to do this except is
# certain cases. The current Perl seems to tolerate spaces.
# Space between keyword and '('
elsif ( $last_type eq 'k' ) {
if ( $last_token eq 'sub' ) {
$ws = $ws_signature_paren->($j);
}
else {
$ws = WS_NO
unless ( $rOpts_space_keyword_paren
|| $space_after_keyword{$last_token} );
# Set inside space flag if requested
set_container_ws_by_keyword( $last_token, $seqno );
}
}
# Space between function and '('
# -----------------------------------------------------
# 'w' and 'i' checks for something like:
# myfun( &myfun( ->myfun(
# -----------------------------------------------------
# Note that at this point an identifier may still have a
# leading arrow, but the arrow will be split off during token
# respacing. After that, the token may become a bare word
# without leading arrow. The point is, it is best to mark
# function call parens right here before that happens.
# Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
# NOTE: this would be the place to allow spaces between
# repeated parens, like () () (), as in case c017, but I
# decided that would not be a good idea.
# Updated to allow detached '->' from tokenizer (issue c140)
elsif (
# /^[wCUG]$/
$is_wCUG{$last_type}
|| (
# /^[wi]$/
$is_wi{$last_type}
&& (
# with prefix '->' or '&'
$last_token =~ /^([\&]|->)/
# or preceding token '->' (see b1337; c140)
|| $rtokh_last_last->[_TYPE_] eq '->'
# or preceding sub call operator token '&'
|| ( $rtokh_last_last->[_TYPE_] eq 't'
&& $rtokh_last_last->[_TOKEN_] =~ /^\&\s*$/ )
)
)
)
{
$ws =
$rOpts_space_function_paren
? $self->ws_space_function_paren( $rtokh_last,
$rtokh_last_last )
: WS_NO;
# Note that this does not include functions called
# with '->(', so that case has to be handled separately
set_container_ws_by_keyword( $last_token, $seqno );
$ris_function_call_paren->{$seqno} = 1;
}
# space between something like $i and ( in 'snippets/space2.in'
# for $i ( 0 .. 20 ) {
elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
$ws = WS_YES;
}
# allow constant function followed by '()' to retain no space
elsif ($last_type eq 'C'
&& $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
{
$ws = WS_NO;
}
# a paren after a sub definition starts signature
elsif ( $last_type eq 'S' ) {
$ws = $ws_signature_paren->($j);
}
else {
# no special rule for this opening paren type
}
}
# patch for SWITCH/CASE: make space at ']{' optional
# since the '{' might begin a case or when block
elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
$ws = WS_OPTIONAL;
}
else {
# opening type not covered by a special rule
}
# keep space between 'sub' and '{' for anonymous sub definition,
# be sure type = 'k' (added for c140)
if ( $type eq '{' ) {
if ( $last_token eq 'sub'
&& $last_type eq 'k'
&& $token ne '(' )
{
$ws = WS_YES;
}
# this is needed to avoid no space in '){'
if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
# avoid any space before the brace or bracket in something like
# @opts{'a','b',...}
if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
$ws = WS_NO;
}
}
# The --extended-block-tightness option allows certain braces
# to be treated as blocks just for setting inner whitespace
if ( $rOpts_extended_block_tightness && $token eq '{' ) {
my $seqno = $rtokh->[_TYPE_SEQUENCE_];
if ( !$rblock_type_of_seqno->{$seqno}
&& $extended_block_tightness_list{$last_token} )
{
# Ok - make this brace a block type for tightness only
$block_type_for_tightness{$seqno} = $last_token;
}
}
} ## end elsif ( $is_opening_type{$type} ) {
else {
# $type not opening, closing, or covered by a special rule
}
# always preserve whatever space was used after a possible
# filehandle (except _)
if ( $last_type eq 'Z' && $last_token ne '_' ) {
# no space for '$ {' even if '$' is marked as type 'Z', issue c221
# note: redundant check on type 'h' here removed for c419 part 2b
if ( $last_type eq 'Z' && $last_token eq '$' && $token eq '{' ) {
$ws = WS_NO;
}
else {
$ws = WS_OPTIONAL;
}
}
$ws_4 = $ws_3 = $ws
if DEBUG_WHITE;
if ( !defined($ws) ) {
#---------------------------------------------------------------
# Whitespace Rules Section 4:
# Use the binary rule table.
#---------------------------------------------------------------
if ( defined( $binary_ws_rules{$last_type}{$type} ) ) {
$ws = $binary_ws_rules{$last_type}{$type};
$ws_4 = $ws if DEBUG_WHITE;
}
#---------------------------------------------------------------
# Whitespace Rules Section 5:
# Apply default rules not covered above.
#---------------------------------------------------------------
# If we fall through to here, look at the pre-defined hash tables
# for the two tokens, and:
# if (they are equal) use the common value
# if (either is zero or undef) use the other
# if (either is -1) use it
# That is,
# left vs right
# 1 vs 1 --> 1
# 0 vs 0 --> 0
# -1 vs -1 --> -1
#
# 0 vs -1 --> -1
# 0 vs 1 --> 1
# 1 vs 0 --> 1
# -1 vs 0 --> -1
#
# -1 vs 1 --> -1
# 1 vs -1 --> -1
else {
my $wl = $want_left_space{$type};
my $wr = $want_right_space{$last_type};
if ( !defined($wl) ) {
$ws = defined($wr) ? $wr : 0;
}
elsif ( !defined($wr) ) {
$ws = $wl;
}
else {
$ws =
( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
}
}
}
# Treat newline as a whitespace. Otherwise, we might combine
# 'Send' and '-recipients' here according to the above rules:
# <<snippets/space3.in>>
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
if ( !$ws
&& $rtokh->[_LINE_INDEX_] != $rtokh_last->[_LINE_INDEX_] )
{
$ws = WS_YES;
}
# -qwaf phantom commas require space before type 'Q'
# See similar patch in sub is_essential_whitespace
if ( !$last_token
&& $last_type eq ','
&& $type eq 'Q'
&& $rOpts_qw_as_function )
{
$ws = 1;
}
$rwhitespace_flags->[$j] = $ws;
# remember non-blank, non-comment tokens
$last_token = $token;
$last_type = $type;
$rtokh_last_last = $rtokh_last;
$rtokh_last = $rtokh;
# Programming note: for some reason, it is very much faster to 'next'
# out of this loop here than to put the DEBUG coding in a block.
# But note that the debug code must then update its own copies
# of $last_token and $last_type.
next if ( !DEBUG_WHITE );
my $str = substr( $last_token_dbg, 0, 15 );
$str .= SPACE x ( 16 - length($str) );
if ( !defined($ws_1) ) { $ws_1 = "*" }
if ( !defined($ws_2) ) { $ws_2 = "*" }
if ( !defined($ws_3) ) { $ws_3 = "*" }
if ( !defined($ws_4) ) { $ws_4 = "*" }
print {*STDOUT}
"NEW WHITE: i=$j $str $last_type_dbg $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
# reset for next pass
$ws_1 = $ws_2 = $ws_3 = $ws_4 = undef;
$last_token_dbg = $token;
$last_type_dbg = $type;
} ## end main loop
if ( $rOpts->{'tight-secret-operators'} ) {
$self->new_secret_operator_whitespace($rwhitespace_flags);
}
$self->[_ris_function_call_paren_] = $ris_function_call_paren;
return $rwhitespace_flags;
} ## end sub set_whitespace_flags
sub set_container_ws_by_keyword {
my ( $word, $sequence_number ) = @_;
return unless (%keyword_paren_inner_tightness);
# We just saw a keyword (or other function name) followed by an opening
# paren. Now check to see if the following paren should have special
# treatment for its inside space. If so we set a hash value using the
# sequence number as key.
if ( $word && $sequence_number ) {
my $tightness_here = $keyword_paren_inner_tightness{$word};
if ( defined($tightness_here) && $tightness_here != 1 ) {
my $ws_flag = $tightness_here == 0 ? WS_YES : WS_NO;
$opening_container_inside_ws{$sequence_number} = $ws_flag;
$closing_container_inside_ws{$sequence_number} = $ws_flag;
}
}
else {
DEVEL_MODE
&& Fault("unexpected token='$word' and seqno='$sequence_number'\n");
}
return;
} ## end sub set_container_ws_by_keyword
sub ws_in_container {
my ( $j, $j_closing, $rLL, $type, $token, $last_token ) = @_;
# Given:
# $j = index of token following an opening container token
# $type, $token = the type and token at index $j
# $j_closing = closing token of the container
# $last_token = the opening token of the container
# Return:
# WS_NO if there is just one token in the container (with exceptions)
# WS_YES otherwise
# quick check
if ( $j + 1 >= $j_closing ) { return WS_NO }
# special cases...
# Count '-foo' as single token so that each of
# $a{-foo} and $a{foo} and $a{'foo'}
# do not get spaces with default formatting.
my $j_here = $j;
++$j_here
if ( $token eq '-'
&& $last_token eq '{'
&& $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
# Count a sign separated from a number as a single token, as in the
# following line. Otherwise, it takes two steps to converge:
# deg2rad(- 0.5)
if ( ( $type eq 'm' || $type eq 'p' )
&& $j < $j_closing + 1
&& $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
&& $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
&& $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
{
$j_here = $j + 2;
}
# recheck..
if ( $j_here + 1 >= $j_closing ) { return WS_NO }
# check for a blank after the first token
my $j_next =
( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
? $j_here + 2
: $j_here + 1;
return $j_next == $j_closing ? WS_NO : WS_YES;
} ## end sub ws_in_container
sub ws_space_function_paren {
my ( $self, $rtokh_last, $rtokh_last_last ) = @_;
# Called if --space-function-paren is set to see if it might cause
# a problem. The manual warns the user about potential problems with
# this flag. Here we just try to catch one common problem.
# Given:
# $j = index of '(' after function name
# Return:
# WS_NO if no space
# WS_YES otherwise
# This was added to fix for issue c166. Ignore -sfp at a possible indirect
# object location. For example, do not convert this:
# print header() ...
# to this:
# print header () ...
# because in this latter form, header may be taken to be a file handle
# instead of a function call.
# Start with the normal value for -sfp:
my $ws = WS_YES;
# now check to be sure we don't cause a problem:
my $type_ll = $rtokh_last_last->[_TYPE_];
my $tok_ll = $rtokh_last_last->[_TOKEN_];
# NOTE: this is just a minimal check. For example, we might also check
# for something like this:
# print ( header ( ..
if ( $type_ll eq 'k' && $is_indirect_object_taker{$tok_ll} ) {
$ws = WS_NO;
}
# do not let -sfp add space for qw's converted to functions by -qwaf
if ( $rOpts_qw_as_function
&& $rtokh_last->[_TYPE_] eq 'U'
&& $rtokh_last->[_TOKEN_] eq 'qw' )
{
$ws = WS_NO;
}
return $ws;
} ## end sub ws_space_function_paren
} ## end closure set_whitespace_flags
sub dump_want_left_space {
my $fh = shift;
local $LIST_SEPARATOR = "\n";
$fh->print(<<EOM);
These values are the main control of whitespace to the left of a token type;
They may be altered with the -wls parameter.
For a list of token types, use perltidy --dump-token-types (-dtt)
1 means the token wants a space to its left
-1 means the token does not want a space to its left
------------------------------------------------------------------------
EOM
foreach my $key ( sort keys %want_left_space ) {
$fh->print("$key\t$want_left_space{$key}\n");
}
return;
} ## end sub dump_want_left_space
sub dump_want_right_space {
my $fh = shift;
local $LIST_SEPARATOR = "\n";
$fh->print(<<EOM);
These values are the main control of whitespace to the right of a token type;
They may be altered with the -wrs parameter.
For a list of token types, use perltidy --dump-token-types (-dtt)
1 means the token wants a space to its right
-1 means the token does not want a space to its right
------------------------------------------------------------------------
EOM
foreach my $key ( sort keys %want_right_space ) {
$fh->print("$key\t$want_right_space{$key}\n");
}
return;
} ## end sub dump_want_right_space
{ ## begin closure is_essential_whitespace
my %is_sort_grep_map;
my %is_digraph;
my %is_trigraph;
my %essential_whitespace_filter_l1;
my %essential_whitespace_filter_r1;
my %essential_whitespace_filter_l2;
my %essential_whitespace_filter_r2;
my %is_type_with_space_before_bareword;
my %is_special_variable_char;
my %is_digit_char;
BEGIN {
my @q;
# NOTE: This hash is like the global %is_sort_map_grep, but it ignores
# grep aliases on purpose, since here we are looking parens, not braces
@q = qw( sort grep map );
@is_sort_grep_map{@q} = (1) x scalar(@q);
@q = qw{
.. :: << >> ** && || // -> => += -=
.= %= &= |= ^= *= <> <= >= == =~ !~
!= ++ -- /= x= ~~ ~. |. &. ^. ^^
};
@is_digraph{@q} = (1) x scalar(@q);
@q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ );
@is_trigraph{@q} = (1) x scalar(@q);
# These are used as a speedup filters for sub is_essential_whitespace.
# Filter 1:
# These left side token types USUALLY do not require a space:
@q = qw( ; { } [ ] L R );
push @q, ',';
push @q, ')';
push @q, '(';
@essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
# BUT some might if followed by these right token types
@q = qw( pp mm << <<= h );
@essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
# Filter 2:
# These right side filters usually do not require a space
@q = qw( ; ] R } );
push @q, ',';
push @q, ')';
@essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
# BUT some might if followed by these left token types
@q = qw( h Z );
@essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
# Keep a space between certain types and any bareword:
# Q: keep a space between a quote and a bareword to prevent the
# bareword from becoming a quote modifier.
# &: do not remove space between an '&' and a bare word because
# it may turn into a function evaluation, like here
# between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
# $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
@q = qw( Q & );
@is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
# These are the only characters which can (currently) form special
# variables, like $^W: (issue c066, c068).
@q =
qw{ ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ };
@is_special_variable_char{@q} = (1) x scalar(@q);
@q = qw( 0 1 2 3 4 5 6 7 8 9 );
@is_digit_char{@q} = (1) x scalar(@q);
} ## end BEGIN
sub is_essential_whitespace {
my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
# Essential whitespace means whitespace which cannot be safely deleted
# without risking the introduction of a syntax error.
# Given: three tokens and their types:
# ($tokenll, $typell) = previous nonblank token to the left of $tokenl
# ($tokenl, $typel) = the token to the left of the space in question
# ($tokenr, $typer) = the token to the right of the space in question
# Return:
# true if whitespace is needed
# false if whitespace may be deleted
#
# Note1: This routine should almost never need to be changed. It is
# for avoiding syntax problems rather than for formatting.
# Note2: The -mangle option causes large numbers of calls to this
# routine and therefore is a good test. So if a change is made, be sure
# to use nytprof to profile with both old and revised coding using the
# -mangle option and check differences.
# This is potentially a very slow routine but the following quick
# filters typically catch and handle over 90% of the calls.
# -qwaf phantom commas require space before type 'Q'
# See similar patch in sub set_whitespace_flags
if ( !$tokenl
&& $typel eq ','
&& $typer eq 'Q'
&& $rOpts_qw_as_function )
{
return 1;
}
# Filter 1: usually no space required after common types ; , [ ] { } ( )
return
if ( $essential_whitespace_filter_l1{$typel}
&& !$essential_whitespace_filter_r1{$typer} );
# Filter 2: usually no space before common types ; ,
return
if ( $essential_whitespace_filter_r2{$typer}
&& !$essential_whitespace_filter_l2{$typel} );
# Filter 3: Handle side comments: a space is only essential if the left
# token ends in '$' For example, we do not want to create $#foo below:
# sub t086
# ( #foo)))
# $ #foo)))
# a #foo)))
# ) #foo)))
# { ... }
# Also, I prefer not to put a ? and # together because ? used to be
# a pattern delimiter and spacing was used if guessing was needed.
if ( $typer eq '#' ) {
return 1
if ( $tokenl
&& ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
return;
}
my $tokenr_leading_ch = substr( $tokenr, 0, 1 );
my $tokenr_leading_ch2 = substr( $tokenr, 0, 2 );
my $tokenr_is_open_paren = $tokenr eq '(';
my $token_joined = $tokenl . $tokenr;
my $tokenl_is_dash = $tokenl eq '-';
my $tokenr_is_bareword = ord($tokenr_leading_ch) > ORD_PRINTABLE_MAX
# always correct but slow
? $tokenr =~ /^[^\d\W]/
# fast but ascii only
: ( $tokenr_leading_ch =~ tr/a-zA-Z_/a-zA-Z_/ );
#-------------------
# Must do full check
#-------------------
# This long logical expression gives the result
my $result =
# never combine two bare words or numbers
# examples: and ::ok(1)
# return ::spw(...)
# for bla::bla:: abc
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
# $input eq"quit" to make $inputeq"quit"
# my $size=-s::SINK if $file; <==OK but we won't do it
# don't join something like: for bla::bla:: abc
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
(
(
## ( $tokenr =~ /^([\'\w]|\:\:)/ )
$tokenr_is_bareword
|| $is_digit_char{$tokenr_leading_ch}
|| $tokenr_leading_ch eq "'"
|| $tokenr_leading_ch2 eq '::'
)
&& ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
)
# do not combine a number with a concatenation dot
# example: pom.caputo:
# $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
|| $typel eq 'n' && $tokenr eq '.'
|| $typer eq 'n' && $tokenl eq '.'
# cases of a space before a bareword...
|| (
$tokenr_is_bareword && (
# do not join a minus with a bare word, because you might form
# a file test operator. Example from Complex.pm:
# if (CORE::abs($z - i) < $eps);
# "z-i" would be taken as a file test.
$tokenl_is_dash && length($tokenr) == 1
# and something like this could become ambiguous without space
# after the '-':
# use constant III=>1;
# $a = $b - III;
# and even this:
# $a = - III;
|| $tokenl_is_dash && $typer =~ /^[wC]$/
# keep space between types Q & and a bareword
|| $is_type_with_space_before_bareword{$typel}
# +-: binary plus and minus before a bareword could get
# converted into unary plus and minus on next pass through the
# tokenizer. This can lead to blinkers: cases b660 b670 b780
# b781 b787 b788 b790 So we keep a space unless the +/- clearly
# follows an operator
|| ( ( $typel eq '+' || $typel eq '-' )
&& $typell !~ /^[niC\)\}\]R]$/ )
# keep a space between a token ending in '$' and any word;
# this caused trouble: "die @$ if $@"
|| $typel eq 'i' && substr( $tokenl, -1, 1 ) eq '$'
# don't combine $$ or $# with any alphanumeric
# (testfile mangle.t with --mangle)
|| $tokenl eq '$$'
|| $tokenl eq '$#'
)
) ## end $tokenr_is_bareword
# OLD, not used
# '= -' should not become =- or you will get a warning
# about reversed -=
# || ($tokenr eq '-')
# do not join a bare word with a minus, like between 'Send' and
# '-recipients' here <<snippets/space3.in>>
# my $msg = new Fax::Send
# -recipients => $to,
# -data => $data;
# This is the safest thing to do. If we had the token to the right of
# the minus we could do a better check.
#
# And do not combine a bareword and a quote, like this:
# oops "Your login, $Bad_Login, is not valid";
# It can cause a syntax error if oops is a sub
|| $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
# perl is very fussy about spaces before <<; c419 part 1
|| $tokenr_leading_ch2 eq '<<' && $typel ne '{' && $typel ne ','
# avoid combining tokens to create new meanings. Example:
# $a+ +$b must not become $a++$b
|| ( $is_digraph{$token_joined} )
|| $is_trigraph{$token_joined}
# another example: do not combine these two &'s:
# allow_options & &OPT_EXECCGI
|| $is_digraph{ $tokenl . $tokenr_leading_ch }
# retain any space after possible filehandle
# (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
# but no space for '$ {' even if '$' is marked as type 'Z', issue c221
|| ( $typel eq 'Z' && !( $tokenl eq '$' && $tokenr eq '{' ) )
# Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
# space after type Y. Otherwise, it will get parsed as type 'Z' later
# and any space would have to be added back manually if desired.
|| $typel eq 'Y'
# Perl is sensitive to whitespace after the + here:
# $b = xvals $a + 0.1 * yvals $a;
|| $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
|| (
$tokenr_is_open_paren && (
# keep paren separate in 'use Foo::Bar ()'
( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
# OLD: keep any space between filehandle and paren:
# file mangle.t with --mangle:
# NEW: this test is no longer necessary here (moved above)
## || $typel eq 'Y'
# must have space between grep and left paren; "grep(" will fail
|| $is_sort_grep_map{$tokenl}
# don't stick numbers next to left parens, as in:
#use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
|| $typel eq 'n'
)
) ## end $tokenr_is_open_paren
# retain any space after here doc operator ( see hereerr.t)
# c419, part 2a: unless followed by '}' or ','. See also part 2b.
# or ; (git174)
|| $typel eq 'h' && $typer ne '}' && $typer ne ',' && $typer ne ';'
# Be careful with a space around ++ and --, to avoid ambiguity as to
# which token it applies
|| ( $typer eq 'pp' || $typer eq 'mm' ) && $tokenl !~ /^[\;\{\(\[]/
|| ( $typel eq '++' || $typel eq '--' )
&& $tokenr !~ /^[\;\}\)\]]/
# Need space after 'for my' or 'foreach my';
# for example, this will fail in older versions of Perl:
# foreach my$ft(@filetypes)...
|| ( $tokenl eq 'my'
&& $tokenr_leading_ch eq '$'
&& $is_for_foreach{$tokenll} )
# Keep space after $^ if needed to avoid forming a different
# special variable (issue c068). For example:
# my $aa = $^ ? "none" : "ok";
# The problem is that '$^?' is a valid special variable
|| ( $typel eq 'i'
&& length($tokenl) == 2
&& substr( $tokenl, 1, 1 ) eq '^'
&& $is_special_variable_char{$tokenr_leading_ch} )
# We must be sure that a space between a ? and a quoted string
# remains if the space before the ? remains. [Loca.pm, lockarea]
# ie,
# $b=join $comma ? ',' : ':', @_; # ok
# $b=join $comma?',' : ':', @_; # ok!
# $b=join $comma ?',' : ':', @_; # error!
# Not really required:
## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
# Space stacked labels...
# Not really required: Perl seems to accept non-spaced labels.
## || $typel eq 'J' && $typer eq 'J'
; # the value of this long logic sequence is the result we want
return $result;
} ## end sub is_essential_whitespace
} ## end closure is_essential_whitespace
{ ## begin closure new_secret_operator_whitespace
my %secret_operators;
my %is_leading_secret_token;
BEGIN {
# token lists for perl secret operators as compiled by Philippe Bruhat
# at: https://metacpan.org/module/perlsecret
%secret_operators = (
'Goatse' => [qw#= ( ) =#], #=( )=
'Venus1' => [qw#0 +#], # 0+
'Venus2' => [qw#+ 0#], # +0
'Enterprise' => [qw#) x ! !#], # ()x!!
'Kite1' => [qw#~ ~ <>#], # ~~<>
'Kite2' => [qw#~~ <>#], # ~~<>
'Winking Fat Comma' => [ ( ',', '=>' ) ], # ,=>
'Bang bang' => [qw#! !#], # !!
);
# The following operators and constants are not included because they
# are normally kept tight by perltidy:
# ~~ <~>
#
# Make a lookup table indexed by the first token of each operator:
# first token => [list, list, ...]
foreach my $value ( values(%secret_operators) ) {
my $tok = $value->[0];
push @{ $is_leading_secret_token{$tok} }, $value;
}
} ## end BEGIN
sub new_secret_operator_whitespace {
my ( $self, $rwhitespace_flags ) = @_;
# Implement --tight-secret-operators
# Given:
# $rwhitespace_flags = whitespase flags, to be updated
# Loop over all tokens in this line
my $rLL = $self->[_rLL_];
my $jmax = @{$rLL} - 1;
foreach my $j ( 0 .. $jmax ) {
# Skip unless this token might start a secret operator
my $type = $rLL->[$j]->[_TYPE_];
next if ( $type eq 'b' );
my $token = $rLL->[$j]->[_TOKEN_];
next unless ( $is_leading_secret_token{$token} );
# Loop over all secret operators with this leading token
foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
my $jend = $j - 1;
foreach my $tok ( @{$rpattern} ) {
$jend++;
$jend++
if ( $jend <= $jmax
&& $rLL->[$jend]->[_TYPE_] eq 'b' );
if ( $jend > $jmax
|| $tok ne $rLL->[$jend]->[_TOKEN_] )
{
$jend = undef;
last;
}
}
if ($jend) {
# set flags to prevent spaces within this operator
foreach my $jj ( $j + 1 .. $jend ) {
$rwhitespace_flags->[$jj] = WS_NO;
}
$j = $jend;
last;
}
} ## End Loop over all operators
} ## End loop over all tokens
return;
} ## end sub new_secret_operator_whitespace
} ## end closure new_secret_operator_whitespace
{ ## begin closure set_bond_strengths
# These routines and variables are involved in deciding where to break very
# long lines.
# NEW_TOKENS must add bond strength rules
my %is_good_keyword_breakpoint;
my %is_container_token;
my %binary_bond_strength_nospace;
my %binary_bond_strength;
my %nobreak_lhs;
my %nobreak_rhs;
my @bias_tokens;
my %bias_hash;
my %bias;
my $delta_bias;
sub initialize_bond_strength_hashes {
my @q;
@q = qw( if unless while until for foreach );
@is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
@q = qw/ ( [ { } ] ) /;
@is_container_token{@q} = (1) x scalar(@q);
# The decision about where to break a line depends upon a "bond
# strength" between tokens. The LOWER the bond strength, the MORE
# likely a break. A bond strength may be any value but to simplify
# things there are several pre-defined strength levels:
# NO_BREAK => 10000;
# VERY_STRONG => 100;
# STRONG => 2.1;
# NOMINAL => 1.1;
# WEAK => 0.8;
# VERY_WEAK => 0.55;
# The strength values are based on trial-and-error, and need to be
# tweaked occasionally to get desired results. Some comments:
#
# 1. Only relative strengths are important. small differences
# in strengths can make big formatting differences.
# 2. Each indentation level adds one unit of bond strength.
# 3. A value of NO_BREAK makes an unbreakable bond
# 4. A value of VERY_WEAK is the strength of a ','
# 5. Values below NOMINAL are considered ok break points.
# 6. Values above NOMINAL are considered poor break points.
#
# The bond strengths should roughly follow precedence order where
# possible. If you make changes, please check the results very
# carefully on a variety of scripts. Testing with the -extrude
# options is particularly helpful in exercising all of the rules.
# Wherever possible, bond strengths are defined in the following
# tables. There are two main stages to setting bond strengths and
# two types of tables:
#
# The first stage involves looking at each token individually and
# defining left and right bond strengths, according to if we want
# to break to the left or right side, and how good a break point it
# is. For example tokens like =, ||, && make good break points and
# will have low strengths, but one might want to break on either
# side to put them at the end of one line or beginning of the next.
#
# The second stage involves looking at certain pairs of tokens and
# defining a bond strength for that particular pair. This second
# stage has priority.
#---------------------------------------------------------------
# Bond Strength BEGIN Section 1.
# Set left and right bond strengths of individual tokens.
#---------------------------------------------------------------
# NOTE: NO_BREAK's set in this section first are HINTS which will
# probably not be honored. Essential NO_BREAKS's should be set in
# BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
# of this subroutine.
# Note that we are setting defaults in this section. The user
# cannot change bond strengths but can cause the left and right
# bond strengths of any token type to be swapped through the use of
# the -wba and -wbb flags. In this way the user can determine if a
# breakpoint token should appear at the end of one line or the
# beginning of the next line.
%right_bond_strength = ();
%left_bond_strength = ();
%binary_bond_strength_nospace = ();
%binary_bond_strength = ();
%nobreak_lhs = ();
%nobreak_rhs = ();
# The hash keys in this section are token types, plus the text of
# certain keywords like 'or', 'and'.
# no break around possible filehandle
$left_bond_strength{'Z'} = NO_BREAK;
$right_bond_strength{'Z'} = NO_BREAK;
# never put a bare word on a new line:
# example print (STDERR, "bla"); will fail with break after (
$left_bond_strength{'w'} = NO_BREAK;
# blanks always have infinite strength to force breaks after
# real tokens
$right_bond_strength{'b'} = NO_BREAK;
# try not to break on exponentiation
@q = qw# ** .. ... <=> #;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (STRONG) x scalar(@q);
# The comma-arrow has very low precedence but not a good break point
$left_bond_strength{'=>'} = NO_BREAK;
$right_bond_strength{'=>'} = NOMINAL;
# ok to break after label
$left_bond_strength{'J'} = NO_BREAK;
$right_bond_strength{'J'} = NOMINAL;
$left_bond_strength{'j'} = STRONG;
$right_bond_strength{'j'} = STRONG;
$left_bond_strength{'A'} = STRONG;
$right_bond_strength{'A'} = STRONG;
$left_bond_strength{'->'} = STRONG;
$right_bond_strength{'->'} = VERY_STRONG;
$left_bond_strength{'CORE::'} = NOMINAL;
$right_bond_strength{'CORE::'} = NO_BREAK;
# Fix for c250: added strengths for new type 'P'
# Note: these are working okay, but may eventually need to be
# adjusted or even removed.
$left_bond_strength{'P'} = NOMINAL;
$right_bond_strength{'P'} = NOMINAL;
# breaking AFTER modulus operator is ok:
@q = qw< % >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
# Break AFTER math operators * and /
@q = qw< * / x >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (NOMINAL) x scalar(@q);
# Break AFTER weakest math operators + and -
# Make them weaker than * but a bit stronger than '.'
@q = qw< + - >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
# Define left strength of unary plus and minus (fixes case b511)
$left_bond_strength{p} = $left_bond_strength{'+'};
$left_bond_strength{m} = $left_bond_strength{'-'};
# And make right strength of unary plus and minus very high.
# Fixes cases b670 b790
$right_bond_strength{p} = NO_BREAK;
$right_bond_strength{m} = NO_BREAK;
# breaking BEFORE these is just ok:
@q = qw# >> << #;
@right_bond_strength{@q} = (STRONG) x scalar(@q);
@left_bond_strength{@q} = (NOMINAL) x scalar(@q);
# breaking before the string concatenation operator seems best
# because it can be hard to see at the end of a line
$right_bond_strength{'.'} = STRONG;
$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
@q = qw< } ] ) R >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} = (NOMINAL) x scalar(@q);
# make these a little weaker than nominal so that they get
# favored for end-of-line characters
@q = qw< != == =~ !~ ~~ !~~ >;
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
# break AFTER these
@q = qw# < > | & >= <= #;
@left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
# breaking either before or after a quote is ok
# but bias for breaking before a quote
$left_bond_strength{'Q'} = NOMINAL;
$right_bond_strength{'Q'} = NOMINAL + 0.02;
$left_bond_strength{'q'} = NOMINAL;
$right_bond_strength{'q'} = NOMINAL;
# starting a line with a keyword is usually ok
$left_bond_strength{'k'} = NOMINAL;
# we usually want to bond a keyword strongly to what immediately
# follows, rather than leaving it stranded at the end of a line
$right_bond_strength{'k'} = STRONG;
$left_bond_strength{'G'} = NOMINAL;
$right_bond_strength{'G'} = STRONG;
# assignment operators
@q = qw( = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= );
# Default is to break AFTER various assignment operators
@left_bond_strength{@q} = (STRONG) x scalar(@q);
@right_bond_strength{@q} =
( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
# Default is to break BEFORE '&&' and '||' and '//'
# set strength of '||' to same as '=' so that chains like
# $a = $b || $c || $d will break before the first '||'
$right_bond_strength{'||'} = NOMINAL;
$left_bond_strength{'||'} = $right_bond_strength{'='};
# same thing for '//'
$right_bond_strength{'//'} = NOMINAL;
$left_bond_strength{'//'} = $right_bond_strength{'='};
# set strength of && a little higher than ||
$right_bond_strength{'&&'} = NOMINAL;
$left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
# set strength of ^^ between && and ||. See git157.
# "1 || 0 ^^ 0 || 1" = true, so ^^ is stronger than ||
# "1 ^^ 1 && 0" = true, so && is stronger than ^^
$right_bond_strength{'^^'} = NOMINAL;
$left_bond_strength{'^^'} = $left_bond_strength{'||'} + 0.05;
$left_bond_strength{';'} = VERY_STRONG;
$right_bond_strength{';'} = VERY_WEAK;
$left_bond_strength{'f'} = VERY_STRONG;
# make right strength of for ';' a little less than '='
# to make for contents break after the ';' to avoid this:
# for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
# $number_of_fields )
# and make it weaker than ',' and 'and' too
$right_bond_strength{'f'} = VERY_WEAK - 0.03;
# The strengths of ?/: should be somewhere between
# an '=' and a quote (NOMINAL),
# make strength of ':' slightly less than '?' to help
# break long chains of ? : after the colons
$left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
$right_bond_strength{':'} = NO_BREAK;
$left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
$right_bond_strength{'?'} = NO_BREAK;
$left_bond_strength{','} = VERY_STRONG;
$right_bond_strength{','} = VERY_WEAK;
# remaining digraphs and trigraphs not defined above
@q = qw( :: <> ++ -- );
@left_bond_strength{@q} = (WEAK) x scalar(@q);
@right_bond_strength{@q} = (STRONG) x scalar(@q);
# Set bond strengths of certain keywords
# make 'or', 'err', 'and' slightly weaker than a ','
$left_bond_strength{'and'} = VERY_WEAK - 0.01;
$left_bond_strength{'or'} = VERY_WEAK - 0.02;
$left_bond_strength{'err'} = VERY_WEAK - 0.02;
$left_bond_strength{'xor'} = VERY_WEAK - 0.01;
@q = qw( ne eq );
@left_bond_strength{@q} = (NOMINAL) x scalar(@q);
@q = qw( lt gt le ge );
@left_bond_strength{@q} = ( 0.9 * NOMINAL + 0.1 * STRONG ) x scalar(@q);
@q = qw( and or err xor ne eq );
@right_bond_strength{@q} = (NOMINAL) x scalar(@q);
$right_bond_strength{'{'} = WEAK;
$left_bond_strength{'{'} = VERY_STRONG;
#---------------------------------------------------------------
# Bond Strength BEGIN Section 2.
# Set binary rules for bond strengths between certain token types.
#---------------------------------------------------------------
# We have a little problem making tables which apply to the
# container tokens. Here is a list of container tokens and
# their types:
#
# type tokens // meaning
# { {, [, ( // indent
# } }, ], ) // outdent
# [ [ // left non-structural [ (enclosing an array index)
# ] ] // right non-structural square bracket
# ( ( // left non-structural paren
# ) ) // right non-structural paren
# L { // left non-structural curly brace (enclosing a key)
# R } // right non-structural curly brace
#
# Some rules apply to token types and some to just the token
# itself. We solve the problem by combining type and token into a
# new hash key for the container types.
#
# If a rule applies to a token 'type' then we need to make rules
# for each of these 'type.token' combinations:
# Type Type.Token
# { {{, {[, {(
# [ [[
# ( ((
# L L{
# } }}, }], })
# ] ]]
# ) ))
# R R}
#
# If a rule applies to a token then we need to make rules for
# these 'type.token' combinations:
# Token Type.Token
# { {{, L{
# [ {[, [[
# ( {(, ((
# } }}, R}
# ] }], ]]
# ) }), ))
# allow long lines before final { in an if statement, as in:
# if (..........
# ..........)
# {
#
# Otherwise, the line before the { tends to be too short.
$binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
$binary_bond_strength{'(('}{'{{'} = NOMINAL;
# break on something like '} (', but keep this stronger than a ','
# example is in 'howe.pl'
$binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
$binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
# keep matrix and hash indices together
# but make them a little below STRONG to allow breaking open
# something like {'some-word'}{'some-very-long-word'} at the }{
# (bracebrk.t)
$binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
$binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
# increase strength to the point where a break in the following
# will be after the opening paren rather than at the arrow:
# $a->$b($c);
$binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
# Added for c140 to make 'w ->' and 'i ->' behave the same
$binary_bond_strength{'w'}{'->'} = 1.45 * STRONG;
# Note that the following alternative strength would make the break at
# the '->' rather than opening the '('. Both have advantages and
# disadvantages.
# $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
$binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
$binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
$binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
#---------------------------------------------------------------
# Binary NO_BREAK rules
#---------------------------------------------------------------
# use strict requires that bare word and => not be separated
$binary_bond_strength{'C'}{'=>'} = NO_BREAK;
$binary_bond_strength{'U'}{'=>'} = NO_BREAK;
# Never break between a bareword and a following paren because
# perl may give an error. For example, if a break is placed
# between 'to_filehandle' and its '(' the following line will
# give a syntax error [Carp.pm]: my( $no) =fileno(
# to_filehandle( $in)) ;
$binary_bond_strength{'C'}{'(('} = NO_BREAK;
$binary_bond_strength{'C'}{'{('} = NO_BREAK;
$binary_bond_strength{'U'}{'(('} = NO_BREAK;
$binary_bond_strength{'U'}{'{('} = NO_BREAK;
# use strict requires that bare word within braces not start new
# line
$binary_bond_strength{'L{'}{'w'} = NO_BREAK;
$binary_bond_strength{'w'}{'R}'} = NO_BREAK;
# The following two rules prevent a syntax error caused by breaking up
# a construction like '{-y}'. The '-' quotes the 'y' and prevents
# it from being taken as a transliteration. We have to keep
# token types 'L m w' together to prevent this error.
$binary_bond_strength{'L{'}{'m'} = NO_BREAK;
$binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
# keep 'bareword-' together, but only if there is no space between
# the word and dash. Do not keep together if there is a space.
# example 'use perl6-alpha'
$binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
# use strict requires that bare word and => not be separated
$binary_bond_strength{'w'}{'=>'} = NO_BREAK;
# use strict does not allow separating type info from trailing { }
# testfile is readmail.pl
$binary_bond_strength{'t'}{'L{'} = NO_BREAK;
$binary_bond_strength{'i'}{'L{'} = NO_BREAK;
# Fix for c250: set strength for new 'S' to be same as 'i'
# testfile is test11/Hub.pm
$binary_bond_strength{'S'}{'L{'} = NO_BREAK;
# As a defensive measure, do not break between a '(' and a
# filehandle. In some cases, this can cause an error. For
# example, the following program works:
# my $msg="hi!\n";
# print
# ( STDOUT
# $msg
# );
#
# But this program fails:
# my $msg="hi!\n";
# print
# (
# STDOUT
# $msg
# );
#
# This is normally only a problem with the 'extrude' option
$binary_bond_strength{'(('}{'Y'} = NO_BREAK;
$binary_bond_strength{'{('}{'Y'} = NO_BREAK;
# never break between sub name and opening paren
$binary_bond_strength{'w'}{'(('} = NO_BREAK;
$binary_bond_strength{'w'}{'{('} = NO_BREAK;
# keep '}' together with ';'
$binary_bond_strength{'}}'}{';'} = NO_BREAK;
# Breaking before a ++ can cause perl to guess wrong. For
# example the following line will cause a syntax error
# with -extrude if we break between '$i' and '++' [fixstyle2]
# print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
$nobreak_lhs{'++'} = NO_BREAK;
# Do not break before a possible file handle
$nobreak_lhs{'Z'} = NO_BREAK;
# use strict hates bare words on any new line. For
# example, a break before the underscore here provokes the
# wrath of use strict:
# if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
$nobreak_rhs{'F'} = NO_BREAK;
$nobreak_rhs{'CORE::'} = NO_BREAK;
# To prevent the tokenizer from switching between types 'w' and 'G' we
# need to avoid breaking between type 'G' and the following code block
# brace. Fixes case b929.
$nobreak_rhs{G} = NO_BREAK;
#---------------------------------------------------------------
# Bond Strength BEGIN Section 3.
# Define tables and values for applying a small bias to the above
# values.
#---------------------------------------------------------------
# Adding a small 'bias' to strengths is a simple way to make a line
# break at the first of a sequence of identical terms. For
# example, to force long string of conditional operators to break
# with each line ending in a ':', we can add a small number to the
# bond strength of each ':' (colon.t)
@bias_tokens = qw( : && || f and or . ); # tokens which get bias
%bias_hash = map { $_ => 0 } @bias_tokens;
$delta_bias = 0.0001; # a very small strength level
return;
} ## end sub initialize_bond_strength_hashes
use constant DEBUG_BOND => 0;
sub set_bond_strengths {
my ($self) = @_;
# Define a 'bond strength' for each token pair in an output batch.
# See comments above for definition of bond strength.
my $rbond_strength_to_go = [];
my $rLL = $self->[_rLL_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rK_weld_left = $self->[_rK_weld_left_];
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
# patch-its always ok to break at end of line
$nobreak_to_go[$max_index_to_go] = 0;
# we start a new set of bias values for each line
%bias = %bias_hash;
my $code_bias = -.01; # bias for closing block braces
my $type = 'b';
my $token = SPACE;
my $token_length = 1;
my $last_type;
my $last_nonblank_type = $type;
my $last_nonblank_token = $token;
my $list_str = $left_bond_strength{'?'};
my ( $bond_str_1, $bond_str_2, $bond_str_3, $bond_str_4 );
my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
$next_nonblank_type, $next_token, $next_type,
$total_nesting_depth );
# main loop to compute bond strengths between each pair of tokens
foreach my $i ( 0 .. $max_index_to_go ) {
$last_type = $type;
if ( $type ne 'b' ) {
$last_nonblank_type = $type;
$last_nonblank_token = $token;
}
$type = $types_to_go[$i];
# strength on both sides of a blank is the same
if ( $type eq 'b' && $last_type ne 'b' ) {
$rbond_strength_to_go->[$i] = $rbond_strength_to_go->[ $i - 1 ];
$nobreak_to_go[$i] ||= $nobreak_to_go[ $i - 1 ]; # fix for b1257
next;
}
$token = $tokens_to_go[$i];
$token_length = $token_lengths_to_go[$i];
$block_type = $block_type_to_go[$i];
$i_next = $i + 1;
$next_type = $types_to_go[$i_next];
$next_token = $tokens_to_go[$i_next];
$total_nesting_depth = $nesting_depth_to_go[$i_next];
$i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $seqno = $type_sequence_to_go[$i];
my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
# We are computing the strength of the bond between the current
# token and the NEXT token.
#---------------------------------------------------------------
# Bond Strength Section 1:
# First Approximation.
# Use minimum of individual left and right tabulated bond
# strengths.
#---------------------------------------------------------------
my $bsr = $right_bond_strength{$type};
my $bsl = $left_bond_strength{$next_nonblank_type};
# define right bond strengths of certain keywords
if ( $type eq 'k' ) {
if ( defined( $right_bond_strength{$token} ) ) {
$bsr = $right_bond_strength{$token};
}
}
# set terminal bond strength to the nominal value
# this will cause good preceding breaks to be retained
if ( $i_next_nonblank > $max_index_to_go ) {
$bsl = NOMINAL;
# But weaken the bond at a 'missing terminal comma'. If an
# optional comma is missing at the end of a broken list, use
# the strength of a comma anyway to make formatting the same as
# if it were there. Fixes issue c133.
if ( !defined($bsr) || $bsr > VERY_WEAK ) {
my $seqno_px = $parent_seqno_to_go[$max_index_to_go];
if ( $ris_list_by_seqno->{$seqno_px} ) {
my $KK = $K_to_go[$max_index_to_go];
my $Kn = $self->K_next_nonblank($KK);
my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
if ( $seqno_n && $seqno_n eq $seqno_px ) {
$bsl = VERY_WEAK;
}
}
}
}
# define left bond strengths of certain keywords
if ( $next_nonblank_type eq 'k' ) {
if ( defined( $left_bond_strength{$next_nonblank_token} ) ) {
$bsl = $left_bond_strength{$next_nonblank_token};
}
}
# Use the minimum of the left and right strengths. Note: it might
# seem that we would want to keep a NO_BREAK if either token has
# this value. This didn't work, for example because in an arrow
# list, it prevents the comma from separating from the following
# bare word (which is probably quoted by its arrow). So necessary
# NO_BREAK's have to be handled as special cases in the final
# section.
if ( !defined($bsr) ) { $bsr = VERY_STRONG }
if ( !defined($bsl) ) { $bsl = VERY_STRONG }
my $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
$bond_str_1 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# Bond Strength Section 2:
# Apply hardwired rules..
#---------------------------------------------------------------
# Patch to put terminal or clauses on a new line: Weaken the bond
# at an || followed by die or similar keyword to make the terminal
# or clause fall on a new line, like this:
#
# my $class = shift
# || die "Cannot add broadcast: No class identifier found";
#
# Otherwise the break will be at the previous '=' since the || and
# = have the same starting strength and the or is biased, like
# this:
#
# my $class =
# shift || die "Cannot add broadcast: No class identifier found";
#
# In any case if the user places a break at either the = or the ||
# it should remain there.
if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
# /^(die|confess|croak|warn)$/
if ( $is_die_confess_croak_warn{$next_nonblank_token} ) {
if ( $want_break_before{$token} && $i > 0 ) {
$rbond_strength_to_go->[ $i - 1 ] -= $delta_bias;
# keep bond strength of a token and its following blank
# the same
if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
$rbond_strength_to_go->[ $i - 2 ] -= $delta_bias;
}
}
else {
$bond_str -= $delta_bias;
}
}
}
# good to break after end of code blocks
if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
$bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
$code_bias += $delta_bias;
}
if ( $type eq 'k' ) {
# allow certain control keywords to stand out
if ( $next_nonblank_type eq 'k'
&& $is_last_next_redo_return{$token} )
{
$bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
}
# Don't break after keyword my. This is a quick fix for a
# rare problem with perl. An example is this line from file
# Container.pm:
# foreach my $question( Debian::DebConf::ConfigDb::gettree(
# $this->{'question'} ) )
if ( $token eq 'my' ) {
$bond_str = NO_BREAK;
}
}
if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
if ( $is_keyword_returning_list{$next_nonblank_token} ) {
$bond_str = $list_str if ( $bond_str > $list_str );
}
# keywords like 'unless', 'if', etc, within statements
# make good breaks
if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
$bond_str = VERY_WEAK / 1.05;
}
}
# try not to break before a comma-arrow
elsif ( $next_nonblank_type eq '=>' ) {
if ( $bond_str < STRONG ) { $bond_str = STRONG }
}
else {
# no applicable hardwired change
}
#---------------------------------------------------------------
# Additional hardwired NOBREAK rules
#---------------------------------------------------------------
# map1.t -- correct for a quirk in perl
if ( $token eq '('
&& $next_nonblank_type eq 'i'
&& $last_nonblank_type eq 'k'
&& $is_sort_map_grep{$last_nonblank_token} )
# /^(sort|map|grep)$/ )
{
$bond_str = NO_BREAK;
}
# extrude.t: do not break before paren at:
# -l pid_filename(
if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
$bond_str = NO_BREAK;
}
# OLD COMMENT: In older version of perl, use strict can cause
# problems with breaks before bare words following opening parens.
# For example, this will fail under older versions if a break is
# made between '(' and 'MAIL':
# use strict; open( MAIL, "a long filename or command"); close MAIL;
# NEW COMMENT: Third fix for b1213:
# This option does not seem to be needed any longer, and it can
# cause instabilities. It can be turned off, but to minimize
# changes to existing formatting it is retained only in the case
# where the previous token was 'open' and there was no line break.
# Even this could eventually be removed if it causes instability.
if ( $type eq '{' ) {
if ( $token eq '('
&& $next_nonblank_type eq 'w'
&& $last_nonblank_type eq 'k'
&& $last_nonblank_token eq 'open'
&& !$old_breakpoint_to_go[$i] )
{
$bond_str = NO_BREAK;
}
}
# Do not break between a possible filehandle and a ? or / and do
# not introduce a break after it if there is no blank
# (extrude.t)
elsif ( $type eq 'Z' ) {
# don't break..
if (
# if there is no blank and we do not want one. Examples:
# print $x++ # do not break after $x
# print HTML"HELLO" # break ok after HTML
(
$next_type ne 'b'
&& defined( $want_left_space{$next_type} )
&& $want_left_space{$next_type} == WS_NO
)
# or we might be followed by the start of a quote,
# and this is not an existing breakpoint; fixes c039.
|| !$old_breakpoint_to_go[$i]
&& substr( $next_nonblank_token, 0, 1 ) eq '/'
)
{
$bond_str = NO_BREAK;
}
}
# Fix for c039
elsif ( $type eq 'w' ) {
$bond_str = NO_BREAK
if ( !$old_breakpoint_to_go[$i]
&& substr( $next_nonblank_token, 0, 1 ) eq '/'
&& $next_nonblank_type ne '//' );
}
else {
# no hardwired rule applies
}
# Breaking before a ? before a quote can cause trouble if
# they are not separated by a blank.
# Example: a syntax error occurs if you break before the ? here
# my$logic=join$all?' && ':' || ',@regexps;
# From: Professional_Perl_Programming_Code/multifind.pl
if ( $next_nonblank_type eq '?' ) {
$bond_str = NO_BREAK
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
}
# Breaking before a . followed by a number
# can cause trouble if there is no intervening space
# Example: a syntax error occurs if you break before the .2 here
# $str .= pack($endian.2, ensurrogate($ord));
# From: perl58/Unicode.pm
elsif ( $next_nonblank_type eq '.' ) {
$bond_str = NO_BREAK
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
}
# Do not break before a phantom comma because it will confuse
# the convergence test (STRANGE message is emitted)
elsif ( $next_nonblank_type eq ',' ) {
if ( !length($next_nonblank_token) ) {
$bond_str = NO_BREAK;
}
}
else {
# no special NO_BREAK rule applies
}
$bond_str_2 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# End of hardwired rules
#---------------------------------------------------------------
#---------------------------------------------------------------
# Bond Strength Section 3:
# Apply table rules. These have priority over the above
# hardwired rules.
#---------------------------------------------------------------
my $tabulated_bond_str;
my $ltype = $type;
my $rtype = $next_nonblank_type;
if ( $seqno && $is_container_token{$token} ) {
$ltype = $type . $token;
}
if ( $next_nonblank_seqno
&& $is_container_token{$next_nonblank_token} )
{
$rtype = $next_nonblank_type . $next_nonblank_token;
# Alternate Fix #1 for issue b1299. This version makes the
# decision as soon as possible. See Alternate Fix #2 also.
# Do not separate a bareword identifier from its paren: b1299
# This is currently needed for stability because if the bareword
# gets separated from a preceding '->' and following '(' then
# the tokenizer may switch from type 'i' to type 'w'. This
# patch will prevent this by keeping it adjacent to its '('.
## if ( $next_nonblank_token eq '('
## && $ltype eq 'i'
## && substr( $token, 0, 1 ) =~ /^\w$/ )
## {
## $ltype = 'w';
## }
}
# apply binary rules which apply regardless of space between tokens
if ( $binary_bond_strength{$ltype}{$rtype} ) {
$bond_str = $binary_bond_strength{$ltype}{$rtype};
$tabulated_bond_str = $bond_str;
}
# apply binary rules which apply only if no space between tokens
if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
$bond_str = $binary_bond_strength{$ltype}{$next_type};
$tabulated_bond_str = $bond_str;
}
if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
$bond_str = NO_BREAK;
$tabulated_bond_str = $bond_str;
}
$bond_str_3 = $bond_str if (DEBUG_BOND);
# If the hardwired rules conflict with the tabulated bond
# strength then there is an inconsistency that should be fixed
DEBUG_BOND
&& $tabulated_bond_str
&& $bond_str_1
&& $bond_str_1 != $bond_str_2
&& $bond_str_2 != $tabulated_bond_str
&& do {
print {*STDOUT}
"BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
};
#-----------------------------------------------------------------
# Bond Strength Section 4:
# Modify strengths of certain tokens which often occur in sequence
# by adding a small bias to each one in turn so that the breaks
# occur from left to right.
#
# Note that we only changing strengths by small amounts here,
# and usually increasing, so we should not be altering any NO_BREAKs.
# Other routines which check for NO_BREAKs will use a tolerance
# of one to avoid any problem.
#-----------------------------------------------------------------
# The bias tables use special keys:
# $type - if not keyword
# $token - if keyword, but map some keywords together
my $left_key =
$type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
my $right_key =
$next_nonblank_type eq 'k'
? $next_nonblank_token eq 'err'
? 'or'
: $next_nonblank_token
: $next_nonblank_type;
# bias left token
if ( defined( $bias{$left_key} ) ) {
if ( !$want_break_before{$left_key} ) {
$bias{$left_key} += $delta_bias;
$bond_str += $bias{$left_key};
}
}
# bias right token
if ( defined( $bias{$right_key} ) ) {
if ( $want_break_before{$right_key} ) {
# for leading '.' align all but 'short' quotes; the idea
# is to not place something like "\n" on a single line.
if ( $right_key eq '.' ) {
my $is_short_quote = $last_nonblank_type eq '.'
&& ( $token_length <=
$rOpts_short_concatenation_item_length )
&& !$is_closing_token{$token};
if ( !$is_short_quote ) {
$bias{$right_key} += $delta_bias;
}
}
else {
$bias{$right_key} += $delta_bias;
}
$bond_str += $bias{$right_key};
}
}
$bond_str_4 = $bond_str if (DEBUG_BOND);
#---------------------------------------------------------------
# Bond Strength Section 5:
# Fifth Approximation.
# Take nesting depth into account by adding the nesting depth
# to the bond strength.
#---------------------------------------------------------------
my $strength;
if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
if ( $total_nesting_depth > 0 ) {
$strength = $bond_str + $total_nesting_depth;
}
else {
$strength = $bond_str;
}
}
else {
$strength = NO_BREAK;
# For critical code such as lines with here targets we must
# be absolutely sure that we do not allow a break. So for
# these the nobreak flag exceeds 1 as a signal. Otherwise we
# can run into trouble when small tolerances are added.
$strength += 1
if ( $nobreak_to_go[$i] && $nobreak_to_go[$i] > 1 );
}
#---------------------------------------------------------------
# Bond Strength Section 6:
# Sixth Approximation. Welds.
#---------------------------------------------------------------
# Do not allow a break within welds
if ( $total_weld_count && $seqno ) {
my $KK = $K_to_go[$i];
if ( $rK_weld_right->{$KK} ) {
$strength = NO_BREAK;
}
# But encourage breaking after opening welded tokens
elsif ($rK_weld_left->{$KK}
&& $is_opening_token{$token} )
{
$strength -= 1;
}
else {
# not welded left or right
}
}
# always break after side comment
if ( $type eq '#' ) { $strength = 0 }
$rbond_strength_to_go->[$i] = $strength;
# Fix for case c001: be sure NO_BREAK's are enforced by later
# routines, except at a '?' because '?' as quote delimiter is
# deprecated.
if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
$nobreak_to_go[$i] ||= 1;
}
DEBUG_BOND && do {
my $str = substr( $token, 0, 15 );
$str .= SPACE x ( 16 - length($str) );
print {*STDOUT}
"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
# reset for next pass
$bond_str_1 = $bond_str_2 = $bond_str_3 = $bond_str_4 = undef;
};
} ## end main loop
return $rbond_strength_to_go;
} ## end sub set_bond_strengths
} ## end closure set_bond_strengths
sub bad_pattern {
my ($pattern) = @_;
# Return true if a regex pattern has an error
# Note: Tokenizer.pm also has a copy of this
my $regex_uu = eval { qr/$pattern/ };
return $EVAL_ERROR;
} ## end sub bad_pattern
{ ## begin closure prepare_cuddled_block_types
my %no_cuddle;
# Add keywords here which really should not be cuddled
BEGIN {
my @q = qw( if unless for foreach while );
@no_cuddle{@q} = (1) x scalar(@q);
}
sub prepare_cuddled_block_types {
# Construct a hash needed by the cuddled-else style
my $cuddled_string = EMPTY_STRING;
if ( $rOpts->{'cuddled-else'} ) {
# set the default
$cuddled_string = 'elsif else continue catch finally'
unless ( $rOpts->{'cuddled-block-list-exclusive'} );
# This is the old equivalent but more complex version
# $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
# Add users other blocks to be cuddled
my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
if ($cuddled_block_list) {
$cuddled_string .= SPACE . $cuddled_block_list;
}
}
# If we have a cuddled string of the form
# 'try-catch-finally'
# we want to prepare a hash of the form
# $rcuddled_block_types = {
# 'try' => {
# 'catch' => 1,
# 'finally' => 1
# },
# };
# use -dcbl to dump this hash
# Multiple such strings are input as a space or comma separated list
# If we get two lists with the same leading type, such as
# -cbl = "-try-catch-finally -try-catch-otherwise"
# then they will get merged as follows:
# $rcuddled_block_types = {
# 'try' => {
# 'catch' => 1,
# 'finally' => 2,
# 'otherwise' => 1,
# },
# };
# This will allow either type of chain to be followed.
$cuddled_string =~ s/,/ /g; # allow space or comma separated lists
my @cuddled_strings = split /\s+/, $cuddled_string;
$rcuddled_block_types = {};
# process each dash-separated string...
my $string_count = 0;
foreach my $string (@cuddled_strings) {
next unless $string;
my @words = split /-+/, $string; # allow multiple dashes
# we could look for and report possible errors here...
next if ( @words <= 0 );
# allow either '-continue' or *-continue' for arbitrary starting type
my $start = '*';
# a single word without dashes is a secondary block type
if ( @words > 1 ) {
$start = shift @words;
}
# always make an entry for the leading word. If none follow, this
# will still prevent a wildcard from matching this word.
if ( !defined( $rcuddled_block_types->{$start} ) ) {
$rcuddled_block_types->{$start} = {};
}
# The count gives the original word order in case we ever want it.
$string_count++;
my $word_count = 0;
foreach my $word (@words) {
next unless $word;
if ( $no_cuddle{$word} ) {
Warn(
"## Ignoring keyword '$word' in -cbl; does not seem right\n"
);
next;
}
$word_count++;
$rcuddled_block_types->{$start}->{$word} =
1; #"$string_count.$word_count";
# git#9: Remove this word from the list of desired one-line
# blocks
$want_one_line_block{$word} = 0;
}
}
return;
} ## end sub prepare_cuddled_block_types
} ## end closure prepare_cuddled_block_types
sub dump_cuddled_block_list {
my ($fh) = @_;
# ORIGINAL METHOD: Here is the format of the cuddled block type hash
# which controls this routine
# my $rcuddled_block_types = {
# 'if' => {
# 'else' => 1,
# 'elsif' => 1
# },
# 'try' => {
# 'catch' => 1,
# 'finally' => 1
# },
# };
# SIMPLIFIED METHOD: the simplified method uses a wildcard for
# the starting block type and puts all cuddled blocks together:
# my $rcuddled_block_types = {
# '*' => {
# 'else' => 1,
# 'elsif' => 1
# 'catch' => 1,
# 'finally' => 1
# },
# };
# Both methods work, but the simplified method has proven to be adequate and
# easier to manage.
my $cuddled_string = $rOpts->{'cuddled-block-list'};
$cuddled_string = EMPTY_STRING unless $cuddled_string;
my $flags = EMPTY_STRING;
$flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
$flags .= " -cbl='$cuddled_string'";
if ( !$rOpts->{'cuddled-else'} ) {
$flags .= "\nNote: You must specify -ce to generate a cuddled hash";
}
$fh->print(<<EOM);
------------------------------------------------------------------------
Hash of cuddled block types prepared for a run with these parameters:
$flags
------------------------------------------------------------------------
EOM
use Data::Dumper;
$fh->print( Dumper($rcuddled_block_types) );
$fh->print(<<EOM);
------------------------------------------------------------------------
EOM
return;
} ## end sub dump_cuddled_block_list
sub make_static_block_comment_pattern {
# create the pattern used to identify static block comments
$static_block_comment_pattern = '^\s*##';
# allow the user to change it
if ( $rOpts->{'static-block-comment-prefix'} ) {
my $prefix = $rOpts->{'static-block-comment-prefix'};
$prefix =~ s/^\s+//;
my $pattern = $prefix;
# user may give leading caret to force matching left comments only
if ( $prefix !~ /^\^#/ ) {
if ( $prefix !~ /^#/ ) {
Die(
"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
);
}
$pattern = '^\s*' . $prefix;
}
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$static_block_comment_pattern = $pattern;
}
return;
} ## end sub make_static_block_comment_pattern
sub make_format_skipping_pattern {
my ( $opt_name, $default ) = @_;
my $param = $rOpts->{$opt_name};
if ( !$param ) { $param = $default }
$param =~ s/^\s+//;
if ( $param !~ /^#/ ) {
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
}
my $pattern = '^' . $param . '\s';
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
);
}
return $pattern;
} ## end sub make_format_skipping_pattern
sub make_non_indenting_brace_pattern {
# Create the pattern used to identify static side comments.
# Note that we are ending the pattern in a \s. This will allow
# the pattern to be followed by a space and some text, or a newline.
# The pattern is used in sub 'non_indenting_braces'
$non_indenting_brace_pattern = '^#<<<\s';
# allow the user to change it
if ( $rOpts->{'non-indenting-brace-prefix'} ) {
my $prefix = $rOpts->{'non-indenting-brace-prefix'};
$prefix =~ s/^\s+//;
if ( $prefix !~ /^#/ ) {
Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
}
my $pattern = '^' . $prefix . '\s';
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$non_indenting_brace_pattern = $pattern;
}
return;
} ## end sub make_non_indenting_brace_pattern
sub make_closing_side_comment_list_pattern {
# turn any input list into a regex for recognizing selected block types
$closing_side_comment_list_pattern = '^\w+';
# '1' is an impossible block name
$closing_side_comment_exclusion_pattern = '^1';
# Need a separate flag for anonymous subs because they are the only
# types where the side comment might follow a ';'
$closing_side_comment_want_asub = 1;
my $cscl = $rOpts->{'closing-side-comment-list'};
if ( defined($cscl) && $cscl ) {
$closing_side_comment_list_pattern =
make_block_pattern( '-cscl', $cscl );
$closing_side_comment_want_asub = $cscl =~ /\basub\b/;
}
my $cscxl = $rOpts->{'closing-side-comment-exclusion-list'};
if ( defined($cscxl) && $cscxl ) {
$closing_side_comment_exclusion_pattern =
make_block_pattern( '-cscxl', $cscxl );
if ( $cscxl =~ /\basub\b/ ) {
$closing_side_comment_want_asub = 0;
}
}
return;
} ## end sub make_closing_side_comment_list_pattern
sub initialize_closing_side_comments {
make_closing_side_comment_prefix();
make_closing_side_comment_list_pattern();
# If closing side comments ARE selected, then we can safely
# delete old closing side comments unless closing side comment
# warnings are requested. This is a good idea because it will
# eliminate any old csc's which fall below the line count threshold.
# We cannot do this if warnings are turned on, though, because we
# might delete some text which has been added. So that must
# be handled when comments are created. And we cannot do this
# with -io because -csc will be skipped altogether.
if ( $rOpts->{'closing-side-comments'} ) {
if ( !$rOpts->{'closing-side-comment-warnings'}
&& !$rOpts->{'indent-only'} )
{
$rOpts->{'delete-closing-side-comments'} = 1;
}
}
# If closing side comments ARE NOT selected, but warnings ARE
# selected and we ARE DELETING csc's, then we will pretend to be
# adding with a huge interval. This will force the comments to be
# generated for comparison with the old comments, but not added.
elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
if ( $rOpts->{'delete-closing-side-comments'} ) {
$rOpts->{'delete-closing-side-comments'} = 0;
$rOpts->{'closing-side-comments'} = 1;
$rOpts->{'closing-side-comment-interval'} = 100_000_000;
}
}
else {
# no -csc flags
}
return;
} ## end sub initialize_closing_side_comments
sub initialize_missing_else_comment {
my $comment = $rOpts->{'add-missing-else-comment'};
if ( !$comment ) {
$comment = '##FIX' . 'ME - added with perltidy -ame';
}
else {
$comment = substr( $comment, 0, 60 );
$comment =~ s/^\s+//;
$comment =~ s/\s+$//;
$comment =~ s/\n/ /g;
if ( substr( $comment, 0, 1 ) ne '#' ) {
$comment = '#' . $comment;
}
}
$rOpts->{'add-missing-else-comment'} = $comment;
return;
} ## end sub initialize_missing_else_comment
sub make_sub_matching_pattern {
# Patterns for standardizing matches to block types for regular subs and
# anonymous subs. Examples
# 'sub process' is a named sub
# 'sub ::m' is a named sub
# 'sub' is an anonymous sub
# 'sub:' is a label, not a sub
# 'sub :' is a label, not a sub ( block type will be <sub:> )
# sub'_ is a named sub ( block type will be <sub '_> )
# 'substr' is a keyword
# So note that named subs always have a space after 'sub'
$SUB_PATTERN = '^sub\s'; # match normal sub
$ASUB_PATTERN = '^sub$'; # match anonymous sub
%matches_ASUB = ( 'sub' => 1 );
# Fix the patterns to include any sub aliases:
# Note that any 'sub-alias-list' has been preprocessed to
# be a trimmed, space-separated list which includes 'sub'
# for example, it might be 'sub method fun'
my @words;
my $sub_alias_list = $rOpts->{'sub-alias-list'};
if ($sub_alias_list) {
@words = split /\s+/, $sub_alias_list;
}
else {
push @words, 'sub';
}
# add 'method' unless use-feature='noclass' is set.
if ( !defined( $rOpts->{'use-feature'} )
|| $rOpts->{'use-feature'} !~ /\bnoclass\b/ )
{
push @words, 'method';
}
# Note (see also RT #133130): These patterns are used by
# sub make_block_pattern, which is used for making most patterns.
# So this sub needs to be called before other pattern-making routines.
if ( @words > 1 ) {
# Two ways are provided to match an anonymous sub:
# $ASUB_PATTERN - with a regex (old method, slow)
# %matches_ASUB - with a hash lookup (new method, faster)
@matches_ASUB{@words} = (1) x scalar(@words);
my $alias_list = join '|', keys %matches_ASUB;
$SUB_PATTERN =~ s/sub/\($alias_list\)/;
$ASUB_PATTERN =~ s/sub/\($alias_list\)/;
}
return;
} ## end sub make_sub_matching_pattern
sub make_bl_pattern {
# Set defaults lists to retain historical default behavior for -bl:
my $bl_list_string = '*';
my $bl_exclusion_list_string = 'sort map grep eval asub';
my $bl_long_name = 'opening-brace-on-new-line';
my $bll_long_name = 'brace-left-list';
my $blxl_long_name = 'brace-left-exclusion-list';
my $sbl_long_name = 'opening-sub-brace-on-new-line';
my $asbl_long_name = 'opening-anonymous-sub-brace-on-new-line';
if ( defined( $rOpts->{$bll_long_name} ) && $rOpts->{$bll_long_name} ) {
$bl_list_string = $rOpts->{$bll_long_name};
}
if ( $bl_list_string =~ /\bsub\b/ ) {
$rOpts->{$sbl_long_name} ||= $rOpts->{$bl_long_name};
}
if ( $bl_list_string =~ /\basub\b/ ) {
$rOpts->{$asbl_long_name} ||= $rOpts->{$bl_long_name};
}
$bl_pattern = make_block_pattern( '-bll', $bl_list_string );
# for -bl, a list with '*' turns on -sbl and -asbl
if ( $bl_pattern =~ /\.\*/ ) {
if ( !defined( $rOpts->{$sbl_long_name} ) ) {
$rOpts->{$sbl_long_name} = $rOpts->{$bl_long_name};
}
if ( !defined( $rOpts->{$asbl_long_name} )
&& defined( $rOpts->{$bll_long_name} ) )
{
$rOpts->{$asbl_long_name} = $rOpts->{$bl_long_name};
}
}
if ( defined( $rOpts->{$blxl_long_name} )
&& $rOpts->{$blxl_long_name} )
{
$bl_exclusion_list_string = $rOpts->{$blxl_long_name};
if ( $bl_exclusion_list_string =~ /\bsub\b/ ) {
$rOpts->{$sbl_long_name} = 0;
}
if ( $bl_exclusion_list_string =~ /\basub\b/ ) {
$rOpts->{$asbl_long_name} = 0;
}
}
$bl_exclusion_pattern =
make_block_pattern( '-blxl', $bl_exclusion_list_string );
return;
} ## end sub make_bl_pattern
sub make_bli_pattern {
# Default list of block types for which -bli would apply:
my $bli_list_string = 'if else elsif unless while for foreach do : sub';
my $bli_exclusion_list_string = SPACE;
if ( defined( $rOpts->{'brace-left-and-indent-list'} )
&& $rOpts->{'brace-left-and-indent-list'} )
{
$bli_list_string = $rOpts->{'brace-left-and-indent-list'};
}
$bli_pattern = make_block_pattern( '-blil', $bli_list_string );
if ( defined( $rOpts->{'brace-left-and-indent-exclusion-list'} )
&& $rOpts->{'brace-left-and-indent-exclusion-list'} )
{
$bli_exclusion_list_string =
$rOpts->{'brace-left-and-indent-exclusion-list'};
}
$bli_exclusion_pattern =
make_block_pattern( '-blixl', $bli_exclusion_list_string );
return;
} ## end sub make_bli_pattern
sub make_keyword_group_list_pattern {
# Turn any input list into a regex for recognizing selected block types.
# Here are the defaults:
$keyword_group_list_pattern = '^(our|local|my|use|require|)$';
$keyword_group_list_comment_pattern = EMPTY_STRING;
if ( defined( $rOpts->{'keyword-group-blanks-list'} )
&& $rOpts->{'keyword-group-blanks-list'} )
{
my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
my @keyword_list;
my @comment_list;
foreach my $word (@words) {
if ( $word eq 'BC' || $word eq 'SBC' ) {
push @comment_list, $word;
if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
}
else {
push @keyword_list, $word;
}
}
$keyword_group_list_pattern =
make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
$keyword_group_list_comment_pattern =
make_block_pattern( '-kgbl', join( SPACE, @comment_list ) );
}
return;
} ## end sub make_keyword_group_list_pattern
sub make_block_brace_vertical_tightness_pattern {
# Turn any input list into a regex for recognizing selected block types
$block_brace_vertical_tightness_pattern =
'^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
&& $rOpts->{'block-brace-vertical-tightness-list'} )
{
$block_brace_vertical_tightness_pattern =
make_block_pattern( '-bbvtl',
$rOpts->{'block-brace-vertical-tightness-list'} );
}
return;
} ## end sub make_block_brace_vertical_tightness_pattern
sub make_blank_line_pattern {
$blank_lines_before_closing_block_pattern = $SUB_PATTERN;
my $key = 'blank-lines-before-closing-block-list';
if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
$blank_lines_before_closing_block_pattern =
make_block_pattern( '-blbcl', $rOpts->{$key} );
}
$blank_lines_after_opening_block_pattern = $SUB_PATTERN;
$key = 'blank-lines-after-opening-block-list';
if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
$blank_lines_after_opening_block_pattern =
make_block_pattern( '-blaol', $rOpts->{$key} );
}
return;
} ## end sub make_blank_line_pattern
sub make_block_pattern {
# Given a string of block-type keywords, return a regex to match them
# The only tricky part is that labels are indicated with a single ':'
# and the 'sub' token text may have additional text after it (name of
# sub).
#
# Example:
#
# input string: "if else elsif unless while for foreach do : sub";
# pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
# Minor Update:
#
# To distinguish between anonymous subs and named subs, use 'sub' to
# indicate a named sub, and 'asub' to indicate an anonymous sub
my ( $abbrev, $string ) = @_;
my @list = split_words($string);
my @words = ();
my %seen;
for my $i (@list) {
if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
next if $seen{$i};
$seen{$i} = 1;
if ( $i eq 'sub' ) {
}
elsif ( $i eq 'asub' ) {
}
elsif ( $i eq ';' ) {
push @words, ';';
}
elsif ( $i eq '{' ) {
push @words, '\{';
}
elsif ( $i eq ':' ) {
push @words, '\w+:';
}
elsif ( $i =~ /^\w/ ) {
push @words, $i;
}
else {
Warn("unrecognized block type $i after $abbrev, ignoring\n");
}
}
# Fix 2 for c091, prevent the pattern from matching an empty string
# '1 ' is an impossible block name.
if ( !@words ) { push @words, "1 " }
my $pattern = '(' . join( '|', @words ) . ')$';
my $sub_patterns = EMPTY_STRING;
if ( $seen{'sub'} ) {
$sub_patterns .= '|' . $SUB_PATTERN;
}
if ( $seen{'asub'} ) {
$sub_patterns .= '|' . $ASUB_PATTERN;
}
if ($sub_patterns) {
$pattern = '(' . $pattern . $sub_patterns . ')';
}
$pattern = '^' . $pattern;
return $pattern;
} ## end sub make_block_pattern
sub make_static_side_comment_pattern {
# Create the pattern used to identify static side comments
$static_side_comment_pattern = '^##';
# allow the user to change it
if ( $rOpts->{'static-side-comment-prefix'} ) {
my $prefix = $rOpts->{'static-side-comment-prefix'};
$prefix =~ s/^\s+//;
my $pattern = '^' . $prefix;
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
);
}
$static_side_comment_pattern = $pattern;
}
return;
} ## end sub make_static_side_comment_pattern
sub make_closing_side_comment_prefix {
# Be sure we have a valid closing side comment prefix
my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
my $csc_prefix_pattern;
if ( !defined($csc_prefix) ) {
$csc_prefix = '## end';
$csc_prefix_pattern = '^##\s+end';
}
else {
my $test_csc_prefix = $csc_prefix;
if ( $test_csc_prefix !~ /^#/ ) {
$test_csc_prefix = '#' . $test_csc_prefix;
}
# make a regex to recognize the prefix
my $test_csc_prefix_pattern = $test_csc_prefix;
# escape any special characters
$test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
$test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
# allow exact number of intermediate spaces to vary
$test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
# make sure we have a good pattern
# if we fail this we probably have an error in escaping
# characters.
if ( bad_pattern($test_csc_prefix_pattern) ) {
# shouldn't happen..must have screwed up escaping, above
if (DEVEL_MODE) {
Fault(<<EOM);
Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'
EOM
}
# just warn and keep going with defaults
Warn(
"Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
);
Warn("Please consider using a simpler -cscp prefix\n");
Warn("Using default -cscp instead; please check output\n");
}
else {
$csc_prefix = $test_csc_prefix;
$csc_prefix_pattern = $test_csc_prefix_pattern;
}
}
$rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
$closing_side_comment_prefix_pattern = $csc_prefix_pattern;
return;
} ## end sub make_closing_side_comment_prefix
sub initialize_keep_old_blank_lines_hash {
# Initialize the control hash for --keep-old-blank-lines-exceptions
%keep_old_blank_lines_exceptions = ();
my $long_name = 'keep-old-blank-lines-exceptions';
my $short_name = 'kblx';
my $opts = $rOpts->{$long_name};
return if ( !defined($opts) );
my @words = split_words($opts);
# Valid input types:
my %top;
my %bottom;
my @q = qw( }b {b cb );
@top{@q} = (1) x scalar(@q);
@q = qw( b{ b} bs bp bc );
@bottom{@q} = (1) x scalar(@q);
my @unknown_types;
# Table of translations to make thes closer to perltidy token types
# This must include all characters except 'b'
my %translate = (
'c' => '#',
's' => 'S',
'p' => 'P',
'}' => '}',
'{' => '{',
);
foreach my $str (@words) {
if ( $top{$str} ) {
my $tok = substr( $str, 0, 1 );
$tok = $translate{$tok};
if ( !defined($tok) ) {
## This can only happen if the input has introduced an new
## character which is not in the translation table
DEVEL_MODE && Fault("No top translation for $str\n");
next;
}
$keep_old_blank_lines_exceptions{top}->{$tok} = 1;
}
elsif ( $bottom{$str} ) {
my $tok = substr( $str, 1, 1 );
$tok = $translate{$tok};
if ( !defined($tok) ) {
## This can only happen if the input has introduced an new
## character which is not in the translation table
DEVEL_MODE && Fault("No bottom translation for $str\n");
next;
}
$keep_old_blank_lines_exceptions{bottom}->{$tok} = 1;
}
else {
push @unknown_types, $str;
}
if (@unknown_types) {
my $num = @unknown_types;
local $LIST_SEPARATOR = SPACE;
Warn(<<EOM);
$num unrecognized token types were input with --$short_name :
@unknown_types
EOM
}
}
return;
} ## end sub initialize_keep_old_blank_lines_hash
##################################################
# CODE SECTION 4: receive lines from the tokenizer
##################################################
{ ## begin closure write_line
my $nesting_depth;
# Variables used by sub check_sequence_numbers:
my $initial_seqno;
my $last_seqno;
my %saw_opening_seqno;
my %saw_closing_seqno;
# variables for the -qwaf option
my $in_qw_seqno;
my $in_qw_comma_count;
my $last_new_seqno;
my %new_seqno_from_old_seqno;
my $last_ending_in_quote;
my $added_seqno_count;
sub initialize_write_line {
$nesting_depth = undef;
$initial_seqno = undef;
$last_seqno = SEQ_ROOT;
$last_new_seqno = SEQ_ROOT;
%saw_opening_seqno = ();
%saw_closing_seqno = ();
$in_qw_seqno = 0;
$in_qw_comma_count = 0; # b1491
%new_seqno_from_old_seqno = ();
$last_ending_in_quote = 0;
$added_seqno_count = 0;
return;
} ## end sub initialize_write_line
sub check_sequence_numbers {
# Routine for checking sequence numbers. This only needs to be
# done occasionally in DEVEL_MODE to be sure everything is working
# correctly.
my ( $rtokens, $rtoken_type, $rtype_sequence, $input_line_no ) = @_;
my $jmax = @{$rtokens} - 1;
return if ( $jmax < 0 );
foreach my $j ( 0 .. $jmax ) {
my $seqno = $rtype_sequence->[$j];
my $token = $rtokens->[$j];
my $type = $rtoken_type->[$j];
$seqno = EMPTY_STRING unless ( defined($seqno) );
my $err_msg =
"Error at j=$j, line number $input_line_no, seqno='$seqno', type='$type', tok='$token':\n";
if ( !$seqno ) {
# Sequence numbers are generated for opening tokens, so every opening
# token should be sequenced. Closing tokens will be unsequenced
# if they do not have a matching opening token.
if ( $is_opening_sequence_token{$token}
&& $type ne 'q'
&& $type ne 'Q' )
{
Fault(
<<EOM
$err_msg Unexpected opening token without sequence number
EOM
);
}
}
else {
# Save starting seqno to identify sequence method:
# New method starts with 2 and has continuous numbering
# Old method (NOT USED) starts with >2 and may have gaps
if ( !defined($initial_seqno) ) {
$initial_seqno = $seqno;
# Be sure that sequence numbers start with 2. If not,
# there is a programming error in the tokenizer.
if ( $initial_seqno != 2 ) {
Fault(<<EOM);
Expecting initial sequence number of 2 but got '$initial_seqno'
EOM
}
# Be sure the root sequence number is 1. This is set
# as a constant at the top of this module.
if ( SEQ_ROOT != 1 ) {
my $SEQ_ROOT = SEQ_ROOT;
Fault(<<EOM);
The constant SEQ_ROOT has been changed from 1 to '$SEQ_ROOT'.
EOM
}
}
if ( $is_opening_sequence_token{$token} ) {
# New method should have continuous numbering
if ( $initial_seqno == 2 && $seqno != $last_seqno + 1 ) {
Fault(
<<EOM
$err_msg Unexpected opening sequence number: previous seqno=$last_seqno, but seqno= $seqno
EOM
);
}
$last_seqno = $seqno;
# Numbers must be unique
if ( $saw_opening_seqno{$seqno} ) {
my $lno = $saw_opening_seqno{$seqno};
Fault(
<<EOM
$err_msg Already saw an opening tokens at line $lno with this sequence number
EOM
);
}
$saw_opening_seqno{$seqno} = $input_line_no;
}
# only one closing item per seqno
elsif ( $is_closing_sequence_token{$token} ) {
if ( $saw_closing_seqno{$seqno} ) {
my $lno = $saw_closing_seqno{$seqno};
Fault(
<<EOM
$err_msg Already saw a closing token with this seqno at line $lno
EOM
);
}
$saw_closing_seqno{$seqno} = $input_line_no;
# Every closing seqno must have an opening seqno
if ( !$saw_opening_seqno{$seqno} ) {
Fault(
<<EOM
$err_msg Saw a closing token but no opening token with this seqno
EOM
);
}
}
# Sequenced items must be opening or closing
else {
Fault(
<<EOM
$err_msg Unexpected token type with a sequence number
EOM
);
}
}
}
return;
} ## end sub check_sequence_numbers
# hash keys which are common to old and new line_of_tokens
my @common_keys;
BEGIN {
@common_keys = qw(
_curly_brace_depth _ending_in_quote
_guessed_indentation_level _line_number
_line_text _line_type
_paren_depth _square_bracket_depth
_starting_in_quote
);
}
sub write_line {
my ( $self, $line_of_tokens_input ) = @_;
# This routine receives lines one-by-one from the tokenizer and stores
# them in a format suitable for further processing. After the last
# line has been sent, the tokenizer will call sub 'finish_formatting'
# to do the actual formatting.
# Given:
# $line_of_tokens_input = hash ref of one line from the tokenizer
my $rLL = $self->[_rLL_];
my $line_of_tokens = {};
# copy common hash key values
@{$line_of_tokens}{@common_keys} =
@{$line_of_tokens_input}{@common_keys};
my $line_type = $line_of_tokens_input->{_line_type};
my $tee_output;
my $Klimit = $self->[_Klimit_];
my ( $Kfirst, $Klast );
# Handle line of non-code
if ( $line_type ne 'CODE' ) {
$tee_output ||= $rOpts_tee_pod
&& substr( $line_type, 0, 3 ) eq 'POD';
$line_of_tokens->{_level_0} = 0;
$line_of_tokens->{_ci_level_0} = 0;
$line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
$line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
$line_of_tokens->{_ended_in_blank_token} = undef;
}
# Handle line of code
else {
my $rtokens = $line_of_tokens_input->{_rtokens};
my $jmax = @{$rtokens} - 1;
if ( $jmax >= 0 ) {
$Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
#----------------------------
# get the tokens on this line
#----------------------------
$self->write_line_inner_loop( $line_of_tokens_input,
$line_of_tokens );
# update Klimit for added tokens
$Klimit = @{$rLL} - 1;
$Klast = $Klimit;
} ## end if ( $jmax >= 0 )
else {
# blank line
$line_of_tokens->{_level_0} = 0;
$line_of_tokens->{_ci_level_0} = 0;
$line_of_tokens->{_nesting_blocks_0} = EMPTY_STRING;
$line_of_tokens->{_nesting_tokens_0} = EMPTY_STRING;
$line_of_tokens->{_ended_in_blank_token} = undef;
}
$tee_output ||=
$rOpts_tee_block_comments
&& $jmax == 0
&& $rLL->[$Kfirst]->[_TYPE_] eq '#';
$tee_output ||=
$rOpts_tee_side_comments
&& defined($Kfirst)
&& $Klimit > $Kfirst
&& $rLL->[$Klimit]->[_TYPE_] eq '#';
} ## end if ( $line_type eq 'CODE')
# Finish storing line variables
$line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
$self->[_Klimit_] = $Klimit;
my $rlines = $self->[_rlines_];
push @{$rlines}, $line_of_tokens;
if ($tee_output) {
my $fh_tee = $self->[_fh_tee_];
my $line_text = $line_of_tokens_input->{_line_text};
$fh_tee->print($line_text) if ($fh_tee);
}
# We must use the old line because the qw logic may change this flag
$last_ending_in_quote = $line_of_tokens_input->{_ending_in_quote};
return;
} ## end sub write_line
sub qw_to_function {
my ( $self, $line_of_tokens, $is_ending_token ) = @_;
# This sub implements the -qwaf option:
# It is called for every type 'q' token which is part of a 'qw(' list.
# Essentially all of the coding for the '-qwaf' option is in this sub.
# Input parameters:
# $line_of_tokens = information hash for this line from the tokenizer,
# $is_ending_token = true if this qw does not extend to the next line
# Method:
# This qw token has already been pushed onto the output token stack, so
# we will pop it off and push on a sequence of tokens created by
# breaking it into an opening, a sequence of comma-separated quote
# items, and a closing paren. For multi-line qw quotes, there will be
# one call per input line until the end of the qw text is reached
# and processed.
# Note 1: A critical issue is to correctly generate and insert a new
# sequence number for the new parens into the sequence number stream.
# The new sequence number is the closure variable '$in_qw_seqno'. It
# is defined when the leading 'qw(' is seen, and is undefined when the
# closing ')' is output.
# Note 2: So far, no reason has been found to coordinate this logic
# with the logic which adds and deletes commas. We are adding trailing
# phantom commas here, except for a single list item, so no additional
# trailing comma should be added. And if a phantom trailing comma gets
# deleted, it should not matter because it does not get displayed.
my $rLL = $self->[_rLL_];
my $rSS = $self->[_rSS_];
my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
# Does this qw text spill over onto another line?
my $is_continued =
( $is_ending_token && $line_of_tokens->{_ending_in_quote} );
my $qw_text = $rLL->[-1]->[_TOKEN_];
my $qw_type = $rLL->[-1]->[_TYPE_];
my $qw_level = $rLL->[-1]->[_LEVEL_];
my $qw_text_start = $qw_text;
my $opening = EMPTY_STRING;
my $closing = EMPTY_STRING;
my $has_opening_space;
my $has_closing_space;
# the new word tokens are 1 level deeper than the original 'q' token
my $level_words = $qw_level + 1;
if ( $qw_type ne 'q' ) {
# This should never happen because the calling sub should have just
# pushed a token of type 'q' onto the token list.
my $lno = $line_of_tokens->{_line_number};
Fault("$lno: expecting type 'q' but got $qw_type");
return;
}
if ( !length($qw_text) ) {
# This seems to be an empty type 'q' token. A blank line within a
# qw quote is marked as a blank line rather than a blank 'q' token.
# So this should never happen.
my $lno = $line_of_tokens->{_line_number};
DEVEL_MODE && Fault("$lno: received empty type 'q' text\n");
return;
}
# remove leading 'qw(' if we are starting a new qw
if ( !$in_qw_seqno ) {
$opening = substr( $qw_text, 0, 3 );
if ( $opening ne 'qw(' ) {
# Caller should have checked this before calling
my $lno = $line_of_tokens->{_line_number};
DEVEL_MODE && Fault("$lno: unexpected qw opening: $opening\n");
return;
}
$qw_text = substr( $qw_text, 3 );
$has_opening_space = $qw_text =~ s/^\s+//;
# Do not use -qwaf under high stress (b1482,b1483,b1484,b1485,1486)
# Note: so far all known cases of stress instability have had -naws
# set, so this is included for now. It may eventually need to be
# removed.
# NOTE: The update for b1491 also fixes cases b1482-6 in a
# more general way, so this test can be deactivated.
if ( 0
&& !$rOpts_add_whitespace
&& $level_words >= $high_stress_level )
{
return;
}
}
# Look for and remove any closing ')'
if ( !$is_continued ) {
if ( length($qw_text) > 0 && substr( $qw_text, -1, 1 ) eq ')' ) {
$closing = substr( $qw_text, -1, 1 );
$qw_text = substr( $qw_text, 0, -1 );
$qw_text =~ s/\s+$//;
$has_closing_space = $qw_text =~ s/^\s+//;
}
else {
# We are at the end of a 'qw(' quote according to the
# tokenizer flag '_ending_in_quote', but there is no
# ending ')'. The '$is_continued' flag seems to be wrong.
my $lno = $line_of_tokens->{_line_number};
Fault(<<EOM);
qwaf inconsistency at input line $lno:
closing token is '$closing'
is_continued = $is_continued
EOM
return;
}
}
# Get any quoted words
my @words;
if ( length($qw_text) ) {
@words = split /\s+/, $qw_text;
}
# Be sure we have something left to output
if ( !$opening && !$closing && !@words ) {
my $lno = $line_of_tokens->{_line_number};
DEVEL_MODE && Fault(<<EOM);
Error parsing the following qw string at line $lno:
$qw_text_start
EOM
return;
}
# The combination -naws -lp can currently be unstable for multi-line qw
# (b1487, b1488).
# NOTE: this instability has been fixed by following the input
# whitespace within parens, but keep this code for a while in case the
# issue arises in the future (b1487).
if ( 0
&& !$rOpts_add_whitespace
&& $rOpts_line_up_parentheses
&& ( !$opening || !$closing ) )
{
return;
}
#---------------------------------------------------------------------
# This is the point of no return if the transformation has not started
#---------------------------------------------------------------------
# pop old type q token
my $rtoken_q = pop @{$rLL};
# now push on the replacement tokens
my $comma_count = 0;
if ($opening) {
# generate a new sequence number, one greater than the previous,
# and update a count for synchronization with the calling sub.
$in_qw_seqno = ++$last_new_seqno;
$added_seqno_count++;
my $seqno = $in_qw_seqno;
$self->[_ris_qwaf_by_seqno_]->{$seqno} = 1;
# update relevant seqno hashes
$rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
$nesting_depth++;
$self->[_rI_opening_]->[$seqno] = @{$rSS};
if ( $level_words > $self->[_maximum_level_] ) {
my $input_line_no = $line_of_tokens->{_line_number};
$self->[_maximum_level_] = $level_words;
$self->[_maximum_level_at_line_] = $input_line_no;
}
push @{$rSS}, $seqno;
# make and push the 'qw' token
my $rtoken_qw = copy_token_as_type( $rtoken_q, 'U', 'qw' );
push @{$rLL}, $rtoken_qw;
# make and push the '(' with the new sequence number
$self->[_K_opening_container_]->{$seqno} = @{$rLL};
my $rtoken_opening = copy_token_as_type( $rtoken_q, '{', '(' );
$rtoken_opening->[_TYPE_SEQUENCE_] = $seqno;
push @{$rLL}, $rtoken_opening;
}
# All words must be followed by a comma except for an intact
# structure with a single word, like 'qw(hello)'
my $commas_needed =
!( ( $opening || !$in_qw_comma_count ) && $closing && @words == 1 );
# Make and push each word as a type 'Q' quote followed by a phantom
# comma. The phantom comma is type ',' and is processed
# exactly like any other comma, but it has an empty string as the token
# text, so the line will display as a regular qw quote.
if (@words) {
foreach my $word (@words) {
# always space after a comma; follow input spacing after '('
if ( $comma_count || $has_opening_space ) {
my $rtoken_space =
copy_token_as_type( $rtoken_q, 'b', SPACE );
$rtoken_space->[_LEVEL_] = $level_words;
push @{$rLL}, $rtoken_space;
}
# this quoted text
my $rtoken_word = copy_token_as_type( $rtoken_q, 'Q', $word );
$rtoken_word->[_LEVEL_] = $level_words;
push @{$rLL}, $rtoken_word;
# Add a comma if needed. NOTE on trailing commas:
# - For multiple words: Trailing commas must be added.
# Otherwise, -atc might put a comma in a qw list.
# - For single words: Trailing commas are not required, and
# are best avoided. This is because:
# - atc will not add commas to a list which has no commas
# - This will make the single-item spacing rule work as
# expected.
# - This will reduce the chance of instability (b1491)
if ($commas_needed) {
my $rtoken_comma =
copy_token_as_type( $rtoken_q, ',', EMPTY_STRING );
$rtoken_comma->[_LEVEL_] = $level_words;
push @{$rLL}, $rtoken_comma;
$comma_count++;
}
}
}
# make and push closing sequenced item ')'
if ($closing) {
# OPTIONAL: remove a previous comma if it is the only one. This can
# happen if this closing paren starts a new line and there was just
# one word in the qw list. The reason for doing this would be
# to avoid possible instability, but none is currently known. b1491.
# This has been tested but is currently inactive because it has not
# been found to be necessary.
if ( 0
&& !@words
&& $in_qw_comma_count == 1
&& $rLL->[-1]->[_TYPE_] eq ',' )
{
# It is simpler to convert it to a blank; otherwise it would
# be necessary to change the range [Kfirst,Klast] of the
# previous line and the current line.
$rLL->[-1]->[_TYPE_] = 'b';
}
# Force paren tightness = 0 if closing paren follows a backslash
# c414, c424, and c446. for example:
# my @clock_chars = qw( | / - \ | / - \ );
my $iQ = $rLL->[-1]->[_TYPE_] eq 'Q' ? -1 : -2;
if ( substr( $rLL->[$iQ]->[_TOKEN_], -1, 1 ) eq BACKSLASH ) {
$self->[_rtightness_override_by_seqno_]->{$in_qw_seqno} = 0;
if ( !$rOpts_add_whitespace ) {
$rLL->[$iQ]->[_TOKEN_] .= SPACE;
}
}
# follow input spacing before ')'
if ($has_closing_space) {
my $rtoken_space = copy_token_as_type( $rtoken_q, 'b', SPACE );
$rtoken_space->[_LEVEL_] = $level_words;
push @{$rLL}, $rtoken_space;
}
my $seqno = $in_qw_seqno;
$self->[_K_closing_container_]->{$seqno} = @{$rLL};
$nesting_depth = $rdepth_of_opening_seqno->[$seqno];
$self->[_rI_closing_]->[$seqno] = @{$rSS};
push @{$rSS}, -1 * $seqno;
# make the ')'
my $rtoken_closing = copy_token_as_type( $rtoken_q, '}', ')' );
$rtoken_closing->[_TYPE_SEQUENCE_] = $in_qw_seqno;
push @{$rLL}, $rtoken_closing;
# all done with this qw list
$in_qw_seqno = 0;
$in_qw_comma_count = 0;
}
else {
$in_qw_comma_count += $comma_count;
}
# The '_ending_in_quote' flag for this line is no longer needed
if ($is_continued) { $line_of_tokens->{_ending_in_quote} = 0 }
return;
} ## end sub qw_to_function
sub write_line_inner_loop {
my ( $self, $line_of_tokens_old, $line_of_tokens ) = @_;
# Copy the tokens on one line received from the tokenizer to their new
# storage locations.
# Input parameters:
# $line_of_tokens_old = line received from tokenizer
# $line_of_tokens = line of tokens being formed for formatter
my $rtokens = $line_of_tokens_old->{_rtokens};
my $jmax = @{$rtokens} - 1;
if ( $jmax < 0 ) {
# safety check; shouldn't happen
my $lno = $line_of_tokens->{_line_number};
DEVEL_MODE && Fault("$lno: unexpected jmax=$jmax\n");
return;
}
my $line_index = $line_of_tokens_old->{_line_number} - 1;
my $rtoken_type = $line_of_tokens_old->{_rtoken_type};
my $rblock_type = $line_of_tokens_old->{_rblock_type};
my $rtype_sequence = $line_of_tokens_old->{_rtype_sequence};
my $rlevels = $line_of_tokens_old->{_rlevels};
my $rLL = $self->[_rLL_];
my $rSS = $self->[_rSS_];
my $rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
DEVEL_MODE
&& check_sequence_numbers( $rtokens, $rtoken_type,
$rtype_sequence, $line_index + 1 );
# Find the starting nesting depth ...
# It must be the value of variable 'level' of the first token
# because the nesting depth is used as a token tag in the
# vertical aligner and is compared to actual levels.
# So vertical alignment problems will occur with any other
# starting value.
if ( !defined($nesting_depth) ) {
$nesting_depth = $rlevels->[0];
$nesting_depth = 0 if ( $nesting_depth < 0 );
$rdepth_of_opening_seqno->[SEQ_ROOT] = $nesting_depth - 1;
}
# error check for -qwaf:
if ($in_qw_seqno) {
if ( $rtoken_type->[0] ne 'q' ) {
# -qwaf is expecting another 'q' token for multiline -qw
# based on the {_ending_in_quote} flag from the tokenizer
# of the previous line, but a 'q' didn't arrive.
my $lno = $line_index + 1;
Fault(
"$lno: -qwaf expecting qw continuation line but saw type '$rtoken_type->[0]'\n"
);
}
}
my $j = -1;
# NOTE: coding efficiency is critical in this loop over all tokens
foreach my $token ( @{$rtokens} ) {
# NOTE: Do not clip the 'level' variable yet if it is negative. We
# will do that later, in sub 'store_token_to_go'. The reason is
# that in files with level errors, the logic in 'weld_cuddled_else'
# uses a stack logic that will give bad welds if we clip levels
# here. (A recent update will probably not even allow negative
# levels to arrive here any longer).
my @tokary;
# Handle tokens with sequence numbers ...
# note the ++ increment hidden here for efficiency
if ( $rtype_sequence->[ ++$j ] ) {
my $seqno_old = $rtype_sequence->[$j];
my $seqno = $seqno_old;
my $sign = 1;
if ( $is_opening_token{$token} ) {
if ($added_seqno_count) {
$seqno += $added_seqno_count;
$new_seqno_from_old_seqno{$seqno_old} = $seqno;
}
if ( DEVEL_MODE && $seqno != $last_new_seqno + 1 ) {
my $lno = $line_index + 1;
Fault("$lno: seqno=$seqno last=$last_new_seqno\n");
}
$last_new_seqno = $seqno;
$self->[_K_opening_container_]->{$seqno} = @{$rLL};
$rdepth_of_opening_seqno->[$seqno] = $nesting_depth;
$nesting_depth++;
# Save a sequenced block type at its opening token.
# Note that unsequenced block types can occur in
# unbalanced code with errors but are ignored here.
if ( $rblock_type->[$j] ) {
my $block_type = $rblock_type->[$j];
# Store the block type with sequence number as hash key
$self->[_rblock_type_of_seqno_]->{$seqno} = $block_type;
# and save anynymous subs and named subs in separate
# hashes to avoid future pattern tests
if ( $matches_ASUB{$block_type} ) {
$self->[_ris_asub_block_]->{$seqno} = 1;
}
# The pre-check on space speeds up this test:
elsif ($block_type =~ /\s/
&& $block_type =~ /$SUB_PATTERN/ )
{
$self->[_ris_sub_block_]->{$seqno} = 1;
}
else {
# not a sub type
}
}
}
elsif ( $is_closing_token{$token} ) {
if ($added_seqno_count) {
$seqno =
$new_seqno_from_old_seqno{$seqno_old} || $seqno_old;
}
# The opening depth should always be defined, and
# it should equal $nesting_depth-1. To protect
# against unforeseen error conditions, however, we
# will check this and fix things if necessary. For
# a test case see issue c055.
my $opening_depth = $rdepth_of_opening_seqno->[$seqno];
if ( !defined($opening_depth) ) {
$opening_depth = $nesting_depth - 1;
$opening_depth = 0 if ( $opening_depth < 0 );
$rdepth_of_opening_seqno->[$seqno] = $opening_depth;
# This is not fatal but should not happen. The
# tokenizer generates sequence numbers
# incrementally upon encountering each new
# opening token, so every positive sequence
# number should correspond to an opening token.
my $lno = $line_index + 1;
DEVEL_MODE && Fault(<<EOM);
$lno: No opening token seen for closing token = '$token' at seq=$seqno at depth=$opening_depth
EOM
}
$self->[_K_closing_container_]->{$seqno} = @{$rLL};
$nesting_depth = $opening_depth;
$sign = -1;
}
elsif ( $token eq '?' ) {
if ($added_seqno_count) {
$seqno += $added_seqno_count;
$new_seqno_from_old_seqno{$seqno_old} = $seqno;
}
if ( DEVEL_MODE && $seqno != $last_new_seqno + 1 ) {
my $lno = $line_index + 1;
Fault("$lno: seqno=$seqno last=$last_new_seqno\n");
}
$last_new_seqno = $seqno;
$self->[_K_opening_ternary_]->{$seqno} = @{$rLL};
}
elsif ( $token eq ':' ) {
if ($added_seqno_count) {
$seqno =
$new_seqno_from_old_seqno{$seqno_old} || $seqno_old;
}
$sign = -1;
$self->[_K_closing_ternary_]->{$seqno} = @{$rLL};
}
# The only sequenced types output by the tokenizer are
# the opening & closing containers and the ternary
# types. So we would only get here if the tokenizer has
# been changed to mark some other tokens with sequence
# numbers, or if an error has been introduced in a
# hash such as %is_opening_container
else {
my $lno = $line_index + 1;
DEVEL_MODE && Fault(<<EOM);
$lno: Unexpected sequenced token '$token' of type '$rtoken_type->[$j]', sequence=$seqno arrived from tokenizer.
Expecting only opening or closing container tokens or ternary tokens with sequence numbers.
EOM
}
if ( $sign > 0 ) {
$self->[_rI_opening_]->[$seqno] = @{$rSS};
# For efficiency, we find the maximum level of
# opening tokens of any type. The actual maximum
# level will be that of their contents which is 1
# greater. That will be fixed in sub
# 'finish_formatting'.
my $level = $rlevels->[$j];
if ( $level > $self->[_maximum_level_] ) {
$self->[_maximum_level_] = $level;
$self->[_maximum_level_at_line_] = $line_index + 1;
}
}
else { $self->[_rI_closing_]->[$seqno] = @{$rSS} }
push @{$rSS}, $sign * $seqno;
$tokary[_TYPE_SEQUENCE_] = $seqno;
}
else {
$tokary[_TYPE_SEQUENCE_] = EMPTY_STRING;
}
# Here we are storing the first five variables per token. The
# remaining token variables will be added later as follows:
# _TOKEN_LENGTH_ is added by sub store_token
# _CUMULATIVE_LENGTH_ is added by sub store_token
# _CI_LEVEL_ is added by sub set_ci
# So all token variables are available for use after sub set_ci.
$tokary[_TOKEN_] = $token;
$tokary[_TYPE_] = $rtoken_type->[$j];
$tokary[_LEVEL_] = $rlevels->[$j];
$tokary[_LINE_INDEX_] = $line_index;
push @{$rLL}, \@tokary;
# handle -qwaf option for converting a qw quote (type = 'q') to
# function call
if (
$rOpts_qw_as_function
&& $rtoken_type->[$j] eq 'q'
&& (
# continuing in a qw?
$in_qw_seqno
# starting a new qw?
|| ( ( $j > 0 || !$last_ending_in_quote )
&& substr( $token, 0, 3 ) eq 'qw(' )
)
)
{
$self->qw_to_function( $line_of_tokens, $j == $jmax );
}
} ## end token loop
# Need to remember if we can trim the input line
$line_of_tokens->{_ended_in_blank_token} = $rtoken_type->[$jmax] eq 'b';
# Values needed by Logger if a logfile is saved:
if ( $self->[_save_logfile_] ) {
$line_of_tokens->{_level_0} = $rlevels->[0];
$line_of_tokens->{_ci_level_0} = 0; # fix later
$line_of_tokens->{_nesting_blocks_0} =
$line_of_tokens_old->{_nesting_blocks_0};
$line_of_tokens->{_nesting_tokens_0} =
$line_of_tokens_old->{_nesting_tokens_0};
}
return;
} ## end sub write_line_inner_loop
} ## end closure write_line
#############################################
# CODE SECTION 5: Pre-process the entire file
#############################################
sub finish_formatting {
my ( $self, $severe_error ) = @_;
# The file has been tokenized and is ready to be formatted.
# All of the relevant data is stored in $self, ready to go.
# Given:
# $severe_error = true if a severe error was encountered
# Returns:
# true if input file was copied verbatim due to errors
# false otherwise
# Some of the code in sub break_lists is not robust enough to process code
# with arbitrary brace errors. The simplest fix is to just return the file
# verbatim if there are brace errors. This fixes issue c160.
$severe_error ||= get_saw_brace_error();
# Check the maximum level. If it is extremely large we will give up and
# output the file verbatim. Note that the actual maximum level is 1
# greater than the saved value, so we fix that here.
$self->[_maximum_level_] += 1;
my $maximum_level = $self->[_maximum_level_];
my $maximum_table_index = $#maximum_line_length_at_level;
if ( !$severe_error && $maximum_level >= $maximum_table_index ) {
$severe_error ||= 1;
Warn(<<EOM);
The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
Something may be wrong; formatting will be skipped.
EOM
}
#----------------------------------------------------------------
# Output file verbatim if severe error or no formatting requested
#----------------------------------------------------------------
if ( $severe_error || $rOpts->{notidy} ) {
$self->dump_verbatim();
$self->wrapup($severe_error);
return 1;
}
{
my $rix_side_comments = $self->set_CODE_type();
$self->find_non_indenting_braces($rix_side_comments);
# Handle any requested side comment deletions. It is easier to get
# this done here rather than farther down the pipeline because IO
# lines take a different route, and because lines with deleted HSC
# become BL lines. We have already handled any tee requests in sub
# getline, so it is safe to delete side comments now.
$self->delete_side_comments($rix_side_comments)
if ( $rOpts_delete_side_comments
|| $rOpts_delete_closing_side_comments );
}
# Verify that the line hash does not have any unknown keys.
$self->check_line_hashes() if (DEVEL_MODE);
$self->interbracket_arrow_check();
{
# Make a pass through all tokens, adding or deleting any whitespace as
# required. Also make any other changes, such as adding semicolons.
# All token changes must be made here so that the token data structure
# remains fixed for the rest of this iteration.
my ( $error, $rqw_lines ) = $self->respace_tokens();
if ($error) {
$self->dump_verbatim();
$self->wrapup();
return 1;
}
# sub 'set_ci' is called after sub respace to allow use of type counts
# Token variable _CI_LEVEL_ is only defined after this call
$self->set_ci();
$self->find_multiline_qw($rqw_lines);
}
# Dump unique hash keys
if ( $rOpts->{'dump-unique-keys'} ) {
$self->dump_unique_keys();
Exit(0);
}
if ( $rOpts->{'warn-unique-keys'} ) {
$self->warn_unique_keys()
if ( $self->[_logger_object_] );
}
# Dump any requested block summary data
if ( $rOpts->{'dump-block-summary'} ) {
$self->dump_block_summary();
Exit(0);
}
# Dump variable usage info if requested
if ( $rOpts->{'dump-unusual-variables'} ) {
$self->dump_unusual_variables();
Exit(0);
}
# Act on -warn-variable-types if requested and the logger is available
# (the logger is deactivated during iterations)
$self->warn_variable_types()
if ( %{$rwarn_variable_types}
&& $self->[_logger_object_] );
if ( $rOpts->{'warn-mismatched-args'}
|| $rOpts->{'warn-mismatched-returns'} )
{
$self->warn_mismatched()
if ( $self->[_logger_object_] );
}
if ( $rOpts->{'dump-mismatched-args'} ) {
$self->dump_mismatched_args();
Exit(0);
}
if ( $rOpts->{'dump-mismatched-returns'} ) {
$self->dump_mismatched_returns();
Exit(0);
}
if ( $rOpts->{'dump-mixed-call-parens'} ) {
$self->dump_mixed_call_parens();
Exit(0);
}
# Act on -want-call-parens and --nowant-call-parens requested and the
# logger is available (the logger is deactivated during iterations)
$self->scan_call_parens()
if ( %call_paren_style
&& $self->[_logger_object_] );
$self->examine_vertical_tightness_flags();
$self->set_excluded_lp_containers();
$self->keep_old_line_breaks();
# Implement any welding needed for the -wn or -cb options
$self->weld_containers();
# Collect info needed to implement the -xlp style
$self->xlp_collapsed_lengths()
if ( $rOpts_line_up_parentheses && $rOpts_extended_line_up_parentheses );
# Locate small nested blocks which should not be broken
$self->mark_short_nested_blocks();
$self->special_indentation_adjustments();
# Verify that the main token array looks OK. If this ever causes a fault
# then place similar checks before the sub calls above to localize the
# problem.
$self->check_rLL("Before 'process_all_lines'") if (DEVEL_MODE);
# Finishes formatting and write the result to the line sink.
# Eventually this call should just change the 'rlines' data according to the
# new line breaks and then return so that we can do an internal iteration
# before continuing with the next stages of formatting.
$self->process_all_lines();
# A final routine to tie up any loose ends
$self->wrapup();
return;
} ## end sub finish_formatting
my %is_loop_type;
BEGIN {
my @q = qw( for foreach while do until );
@is_loop_type{@q} = (1) x scalar(@q);
}
sub find_level_info {
my ($self) = @_;
# Find level ranges and total variations of all code blocks in this file.
# Returns:
# ref to hash with block info, with seqno as key (see below)
# The array _rSS_ has the complete container tree for this file.
my $rSS = $self->[_rSS_];
# We will be ignoring everything except code block containers
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my @stack;
my %level_info;
# TREE_LOOP:
foreach my $sseq ( @{$rSS} ) {
my $stack_depth = @stack;
my $seq_next = $sseq > 0 ? $sseq : -$sseq;
next if ( !$rblock_type_of_seqno->{$seq_next} );
if ( $sseq > 0 ) {
# STACK_LOOP:
my $item;
foreach my $seq (@stack) {
$item = $level_info{$seq};
if ( $item->{maximum_depth} < $stack_depth ) {
$item->{maximum_depth} = $stack_depth;
}
$item->{block_count}++;
} ## end STACK LOOP
push @stack, $seq_next;
my $block_type = $rblock_type_of_seqno->{$seq_next};
# If this block is a loop nested within a loop, then we
# will mark it as an 'inner_loop'. This is a useful
# complexity measure.
my $is_inner_loop = 0;
if ( $is_loop_type{$block_type} && defined($item) ) {
$is_inner_loop = $is_loop_type{ $item->{block_type} };
}
$level_info{$seq_next} = {
starting_depth => $stack_depth,
maximum_depth => $stack_depth,
block_count => 1,
block_type => $block_type,
is_inner_loop => $is_inner_loop,
};
}
else {
my $seq_test = pop @stack;
# error check
if ( $seq_test != $seq_next ) {
# Shouldn't happen - the $rSS array must have an error
DEVEL_MODE && Fault("stack error finding total depths\n");
%level_info = ();
last;
}
}
} ## end TREE_LOOP
return \%level_info;
} ## end sub find_level_info
sub find_loop_label {
my ( $self, $seqno ) = @_;
# Given:
# $seqno = sequence number of a block of code for a loop
# Return:
# $label = the loop label text, if any, or an empty string
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $K_opening_container = $self->[_K_opening_container_];
my $label = EMPTY_STRING;
my $K_opening = $K_opening_container->{$seqno};
# backup to the line with the opening paren, if any, in case the
# keyword is on a different line
my $Kp = $self->K_previous_code($K_opening);
return $label unless ( defined($Kp) );
if ( $rLL->[$Kp]->[_TOKEN_] eq ')' ) {
$seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
$K_opening = $K_opening_container->{$seqno};
}
return $label unless ( defined($K_opening) );
my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
# look for a label within a few lines; allow a couple of blank lines
foreach my $lx ( reverse( $lx_open - 3 .. $lx_open ) ) {
last if ( $lx < 0 );
my $line_of_tokens = $rlines->[$lx];
my $line_type = $line_of_tokens->{_line_type};
# stop search on a non-code line
last if ( $line_type ne 'CODE' );
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast_uu ) = @{$rK_range};
# skip a blank line
next if ( !defined($Kfirst) );
# check for a label
if ( $rLL->[$Kfirst]->[_TYPE_] eq 'J' ) {
$label = $rLL->[$Kfirst]->[_TOKEN_];
last;
}
# quit the search if we are above the starting line
last if ( $lx < $lx_open );
}
return $label;
} ## end sub find_loop_label
{ ## closure find_mccabe_count
my %is_mccabe_logic_keyword;
my %is_mccabe_logic_operator;
BEGIN {
my @q = (qw( && || ||= &&= ? <<= >>= ));
@is_mccabe_logic_operator{@q} = (1) x scalar(@q);
@q = (qw( and or xor if else elsif unless until while for foreach ));
@is_mccabe_logic_keyword{@q} = (1) x scalar(@q);
} ## end BEGIN
sub find_mccabe_count {
my ($self) = @_;
# Find the cumulative mccabe count to each token
# Return '$rmccabe_count_sum' = ref to array with cumulative
# mccabe count to each token $K
# NOTE: This sub currently follows the definitions in Perl::Critic
my $rmccabe_count_sum;
my $rLL = $self->[_rLL_];
my $count = 0;
my $Klimit = $self->[_Klimit_];
foreach my $KK ( 0 .. $Klimit ) {
$rmccabe_count_sum->{$KK} = $count;
my $type = $rLL->[$KK]->[_TYPE_];
if ( $type eq 'k' ) {
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $is_mccabe_logic_keyword{$token} ) { $count++ }
}
else {
if ( $is_mccabe_logic_operator{$type} ) {
$count++;
}
}
}
$rmccabe_count_sum->{ $Klimit + 1 } = $count;
return $rmccabe_count_sum;
} ## end sub find_mccabe_count
} ## end closure find_mccabe_count
sub find_code_line_count {
my ($self) = @_;
# Find the cumulative number of lines of code, excluding blanks,
# comments and pod.
# Return '$rcode_line_count' = ref to array with cumulative
# code line count for each input line number.
my $rcode_line_count;
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $ix_line = -1;
my $code_line_count = 0;
# loop over all lines
foreach my $line_of_tokens ( @{$rlines} ) {
$ix_line++;
# what type of line?
my $line_type = $line_of_tokens->{_line_type};
# if 'CODE' it must be non-blank and non-comment
if ( $line_type eq 'CODE' ) {
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
if ( defined($Kfirst) ) {
# it is non-blank
my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
if ( $jmax > 0 || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
# ok, it is a non-comment
$code_line_count++;
}
}
}
# Count all other special line types except pod;
# For a list of line types see sub 'process_all_lines'
else {
if ( $line_type !~ /^POD/ ) { $code_line_count++ }
}
# Store the cumulative count using the input line index
$rcode_line_count->[$ix_line] = $code_line_count;
}
return $rcode_line_count;
} ## end sub find_code_line_count
sub find_selected_packages {
my ( $self, $rdump_block_types ) = @_;
# Returns a list of all selected package statements in a file for use
# in dumping block information.
if ( !$rdump_block_types->{'*'}
&& !$rdump_block_types->{'package'}
&& !$rdump_block_types->{'class'} )
{
return [];
}
# Find all 'package' tokens in the file
my $rLL = $self->[_rLL_];
my @K_package_list;
foreach my $KK ( 0 .. @{$rLL} - 1 ) {
next if ( $rLL->[$KK]->[_TYPE_] ne 'P' );
push @K_package_list, $KK;
}
# Get the information needed for the block dump
my $rpackage_lists = $self->package_info_maker( \@K_package_list );
my $rpackage_info_list = $rpackage_lists->{'rpackage_info_list'};
# Remove the first item in the info list, which is a dummy package main
shift @{$rpackage_info_list};
# Remove BLOCK format packages since they get reported as blocks separately
my @filtered_list = grep { !$_->{is_block} } @{$rpackage_info_list};
return \@filtered_list;
} ## end sub find_selected_packages
sub find_selected_blocks {
my ( $self, $rdump_block_types, $rlevel_info ) = @_;
# Find blocks needed for --dump-block-summary
# Given:
# $rdump_block_types = hash of user selected block types
# $rlevel_info = info on max depth of blocks
# Returns:
# $rslected_blocks = ref to a list of information on the selected blocks
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $ris_asub_block = $self->[_ris_asub_block_];
my $ris_sub_block = $self->[_ris_sub_block_];
my $dump_all_types = $rdump_block_types->{'*'};
my @selected_blocks;
#---------------------------------------------------
# BEGIN loop over all blocks to find selected blocks
#---------------------------------------------------
foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
my $type;
my $name = EMPTY_STRING;
my $block_type = $rblock_type_of_seqno->{$seqno};
my $K_opening = $K_opening_container->{$seqno};
my $K_closing = $K_closing_container->{$seqno};
my $level = $rLL->[$K_opening]->[_LEVEL_];
my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
my $line_of_tokens = $rlines->[$lx_open];
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
if ( !defined($Kfirst) || !defined($Klast) || $Kfirst > $K_opening ) {
my $line_type = $line_of_tokens->{_line_type};
# shouldn't happen
my $CODE_type = $line_of_tokens->{_code_type};
DEVEL_MODE && Fault(<<EOM);
unexpected line_type=$line_type at line $lx_open, code type=$CODE_type
EOM
next;
}
my ( $max_change, $block_count, $inner_loop_plus ) =
( 0, 0, EMPTY_STRING );
my $item = $rlevel_info->{$seqno};
if ( defined($item) ) {
my $starting_depth = $item->{starting_depth};
my $maximum_depth = $item->{maximum_depth};
$block_count = $item->{block_count};
$max_change = $maximum_depth - $starting_depth + 1;
# this is a '+' character if this block is an inner loops
$inner_loop_plus = $item->{is_inner_loop} ? '+' : EMPTY_STRING;
}
# Skip closures unless type 'closure' is explicitly requested
if ( ( $block_type eq '}' || $block_type eq ';' )
&& $rdump_block_types->{'closure'} )
{
$type = 'closure';
}
# Both 'sub' and 'asub' select an anonymous sub.
# This allows anonymous subs to be explicitly selected
elsif (
$ris_asub_block->{$seqno}
&& ( $dump_all_types
|| $rdump_block_types->{'sub'}
|| $rdump_block_types->{'asub'} )
)
{
$type = 'asub';
# Look back to try to find some kind of name, such as
# my $var = sub { - var is type 'i'
# var => sub { - var is type 'w'
# -var => sub { - var is type 'w'
# 'var' => sub { - var is type 'Q'
my ( $saw_equals, $saw_fat_comma, $blank_count );
foreach my $KK ( reverse( $Kfirst .. $K_opening - 1 ) ) {
my $token_type = $rLL->[$KK]->[_TYPE_];
if ( $token_type eq 'b' ) { $blank_count++; next }
if ( $token_type eq '=>' ) { $saw_fat_comma++; next }
if ( $token_type eq '=' ) { $saw_equals++; next }
if ( $token_type eq 'i' && $saw_equals
|| ( $token_type eq 'w' || $token_type eq 'Q' )
&& $saw_fat_comma )
{
$name = $rLL->[$KK]->[_TOKEN_];
last;
}
}
my $rarg = { seqno => $seqno };
$self->count_sub_input_args($rarg);
my $count = $rarg->{shift_count_min};
if ( !defined($count) ) { $count = '*' }
$type .= '(' . $count . ')';
}
elsif ( $ris_sub_block->{$seqno}
&& ( $dump_all_types || $rdump_block_types->{'sub'} ) )
{
$type = 'sub';
# what we want:
# $block_type $name
# 'sub setidentifier($)' => 'setidentifier'
# 'method setidentifier($)' => 'setidentifier'
my @parts = split /\s+/, $block_type;
$name = $parts[1];
$name =~ s/\(.*$//;
my $rarg = { seqno => $seqno };
$self->count_sub_input_args($rarg);
my $count = $rarg->{shift_count_min};
if ( !defined($count) ) { $count = '*' }
$type .= '(' . $count . ')';
}
elsif (
$block_type =~ /^(package|class)\b/
&& ( $dump_all_types
|| $rdump_block_types->{'package'}
|| $rdump_block_types->{'class'} )
)
{
$type = 'class';
my @parts = split /\s+/, $block_type;
$name = $parts[1];
$name =~ s/\(.*$//;
}
elsif (
$is_loop_type{$block_type}
&& ( $dump_all_types
|| $rdump_block_types->{$block_type}
|| $rdump_block_types->{ $block_type . $inner_loop_plus }
|| $rdump_block_types->{$inner_loop_plus} )
)
{
$type = $block_type . $inner_loop_plus;
}
elsif ( $dump_all_types || $rdump_block_types->{$block_type} ) {
if ( $is_loop_type{$block_type} ) {
$name = $self->find_loop_label($seqno);
}
$type = $block_type;
}
else {
next;
}
push @selected_blocks,
{
K_opening => $K_opening,
K_closing => $K_closing,
line_start => $lx_open + 1,
name => $name,
type => $type,
level => $level,
max_change => $max_change,
block_count => $block_count,
};
} ## END loop to get info for selected blocks
return \@selected_blocks;
} ## end sub find_selected_blocks
sub find_if_chains {
my ( $self, $rdump_block_types, $rlevel_info ) = @_;
# Find if-chains for --dump-block-summary
# Given:
# $rdump_block_types = ref to hash with user block type selections
# $rlevel_info = info on max depth of blocks
# Returns:
# $rslected_blocks = ref to a list of information on the selected blocks
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# For example, 'elsif4' means all if-chains with 4 or more 'elsif's
my @selected_blocks;
# See if user requested any if-chains
# allow 'elsif3' or 'elsif+3'
my @elsif_d = grep { /^elsif\+?\d+$/ } keys %{$rdump_block_types};
if ( !@elsif_d ) { return \@selected_blocks }
# In case of multiple selections, use the minimum
my $elsif_count_min;
foreach my $word (@elsif_d) {
if ( $word =~ /(\d+)$/ ) {
my $num = $1;
if ( !defined($elsif_count_min) || $elsif_count_min > $num ) {
$elsif_count_min = $num;
}
}
}
# Loop over blocks
foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
my $block_type = $rblock_type_of_seqno->{$seqno};
# Must be 'if' or 'unless'
next unless ( $block_type eq 'if' || $block_type eq 'unless' );
# Collect info for this if-chain
my $rif_chain =
$self->follow_if_chain( $seqno, $rlevel_info, $elsif_count_min );
next unless ($rif_chain);
push @selected_blocks, $rif_chain;
}
return \@selected_blocks;
} ## end sub find_if_chains
sub follow_if_chain {
my ( $self, $seqno_if, $rlevel_info, $elsif_count_min ) = @_;
# Follow a chain of if-elsif-elsif-...-else blocks.
# Given:
# $seqno_if = sequence number of an 'if' block
# $rlevel_info = hash of block level information
# $elsif_min_count = minimum number of 'elsif' blocks wanted
# Return:
# nothing if number of 'elsif' blocks is less than $elsif_count_min
# ref to block info hash otherwise
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# Verify that seqno is an 'if' or 'unless'
my $block_type = $rblock_type_of_seqno->{$seqno_if};
if ( $block_type ne 'if' && $block_type ne 'unless' ) {
Fault(
"Bad call: expecting block type 'if' or 'unless' but got '$block_type' for seqno=$seqno_if\n"
);
return;
}
# save sequence numbers in the chain for debugging
my @seqno_list;
# Loop to follow the chain
my $max_change = 0;
my $block_count = 0;
my $elsif_count = 0;
# we are tracing the sequence numbers of consecutive if/elsif/else blocks
my $seqno = $seqno_if;
while ($seqno) {
push @seqno_list, $seqno;
# Update info for this block
$block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type eq 'elsif' ) { $elsif_count++ }
my $item = $rlevel_info->{$seqno};
if ( defined($item) ) {
my $starting_depth = $item->{starting_depth};
my $maximum_depth = $item->{maximum_depth};
$block_count += $item->{block_count};
my $mxc = $maximum_depth - $starting_depth + 1;
if ( $mxc > $max_change ) { $max_change = $mxc }
}
# Chain ends if this is an 'else' block
last if ( $block_type eq 'else' );
# Look at the token following the closing brace
my $Kc = $K_closing_container->{$seqno};
my $K_k = $self->K_next_code($Kc);
last unless defined($K_k);
my $type_k = $rLL->[$K_k]->[_TYPE_];
my $token_k = $rLL->[$K_k]->[_TOKEN_];
# Chain ends unless we arrive at keyword 'elsif' or 'else'
last
unless ( $type_k eq 'k'
&& ( $token_k eq 'elsif' || $token_k eq 'else' ) );
# Handle keyword 'else' : next token be the opening block brace
if ( $token_k eq 'else' ) {
# } else {
# ^ ^ ^
# Kc | |
# K_k Ko
my $Ko = $self->K_next_code($K_k);
last unless defined($Ko);
$seqno = $rLL->[$Ko]->[_TYPE_SEQUENCE_];
if ( $seqno && $rblock_type_of_seqno->{$seqno} eq 'else' ) {
next;
}
# Shouldn't happen unless file has an error
last;
}
# Handle keyword 'elsif':
# } elsif ( $something ) {
# ^ ^ ^ ^ ^
# Kc | | | |
# K_k Kpo Kpc Ko
# hop over the elsif parens
my $kpo = $self->K_next_code($K_k);
last unless defined($kpo);
my $seqno_p = $rLL->[$kpo]->[_TYPE_SEQUENCE_];
last unless ( $seqno_p && $rLL->[$kpo]->[_TOKEN_] eq '(' );
my $Kpc = $K_closing_container->{$seqno_p};
last unless defined($Kpc);
# should be at the opening elsif brace
my $Ko = $self->K_next_code($Kpc);
last unless defined($Ko);
$seqno = $rLL->[$Ko]->[_TYPE_SEQUENCE_];
if ( $seqno && $rblock_type_of_seqno->{$seqno} eq 'elsif' ) {
next;
}
# Shouldn't happen unless file has an error
last;
} ## end while ($seqno)
# check count
return if ( $elsif_count < $elsif_count_min );
# Store the chain
my $K_opening = $K_opening_container->{$seqno_if};
my $K_closing = $K_closing_container->{$seqno};
my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
my $level = $rLL->[$K_opening]->[_LEVEL_];
my $rchain = {
K_opening => $K_opening,
K_closing => $K_closing,
line_start => $lx_open + 1,
name => "elsif+$elsif_count",
type => "if-chain",
level => $level,
max_change => $max_change,
block_count => $block_count,
};
return $rchain;
} ## end sub follow_if_chain
sub get_interpolated_hash_keys {
my ($str) = @_;
# Find hash keys of interpolated variables in a quoted string
# Given:
# $str=a quoted string with possible interpolated vars
# Return:
# ref to list of interpolated hash keys
# Example: for the string:
# "$rhash->{key1} and $other_hash{'key2'} and ${$rlist}"
# finds 'key1' and 'key2' and not '$rlist'
my @keys;
while ( $str =~ m/ \$[A-Za-z_]\w* (?:->)? \{ ([^\$\@\}][^\}]*) \}/gcx ) {
my $key = $1;
my $ch1 = substr( $key, 0, 1 );
if ( $ch1 eq "'" ) {
$key = substr( $key, 1, -1 );
}
push @keys, $key;
} ## end while ( $str =~ ...)
return \@keys;
} ## end sub get_interpolated_hash_keys
sub scan_unique_keys {
my ($self) = @_;
# Scan for hash keys needed to implement --dump-unique-keys, -duk
use constant DEBUG_WUK => 0;
# There are the main phases of the operation:
# PHASE 1: We scan the file and store all hash keys found in the hash
# %{$rhash_key_trove}, including a count for each. These are keys which:
# - occur before a fat comma, such as : "word => $val", and
# - text which occurs within hash braces, like "$hash{word}" or
# a slice like @hash{word1, word2};
# During this scan we save pointers to all quotes and here docs,
# for use in the second phase.
# PHASE 2: We find the keys which occur just once, and store their
# index in the hash %K_unique_key. Then we compare all quoted text
# with these unique keys. If a key matches a quoted string, then
# it is removed from the set of unique keys.
# PHASE 3: We apply a filter to remove sets of multiple related keys
# for which most keys are unique. These are most likely used for
# communication with other code and thus unlikely to be errors.
# PHASE 4: Any remaining keys are output along with their line number.
# Current limitation:
# - Hash keys which occur within quoted text or here docs are processed as
# quotes rather than as primary keys.
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $ris_qwaf_by_seqno = $self->[_ris_qwaf_by_seqno_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
return if ( !defined($Klimit) || $Klimit < 1 );
# stack holds keys _seqno, _KK, _KK_last_nb, _is_slice
my @stack; # token stack during PHASE 1 scan
my $rhash_key_trove = {}; # all hash keys found in PHASE 1
my %K_unique_key; # token index of the unique hash keys
my @Q_list; # list of type Q quote tokens in PHASE 1
my @mw_list; # list of type Q quotes associated with -qwaf
my @Q_getopts; # list of any quote args to a sub getopts
my %first_key_by_id; # debug info
my %saw_use_module; # modules used; updated during main loop scan
my %is_known_module_key; # set by sub $set_known_module_keys after scan
# See https://perldoc.perl.org/perlref
my %is_typeglob_slot_key;
my @q = qw( SCALAR ARRAY HASH CODE IO FILEHANDLE GLOB FORMAT NAME PACKAGE );
@is_typeglob_slot_key{@q} = (1) x scalar(@q);
# Table of keys of hashes which are always available
my %is_fixed_key = (
ALRM => { '$SIG' => 1 },
TERM => { '$SIG' => 1 },
INT => { '$SIG' => 1 },
__DIE__ => { '$SIG' => 1 },
__WARN__ => { '$SIG' => 1 },
HOME => { '$ENV' => 1 },
USER => { '$ENV' => 1 },
LOGNAME => { '$ENV' => 1 },
PATH => { '$ENV' => 1 },
SHELL => { '$ENV' => 1 },
PERL5LIB => { '$ENV' => 1 },
PERLLIB => { '$ENV' => 1 },
);
# Keys of some known modules
# Note that ExtUtils::MakeMaker has a large number of keys
# but they are not included here because they will typically
# be removed with the filter
my %known_module_keys = (
# Common core modules
'File::Temp' =>
[qw( CLEANUP DIR EXLOCK PERMS SUFFIX TEMPLATE TMPDIR UNLINK )],
'File::Path' => [
qw(
chmod error group keep_root mode owner
result safe uid user verbose
)
],
'Test::More' => [qw( tests skip_all import )],
'Test::EOL' => [qw( trailing_whitespace all_reasons )],
'Test' => [qw( tests onfail todo )],
'warnings' => [qw( FATAL NONFATAL )],
'open' => [qw( IN OUT IO )],
'Unicode::Collate' => [
qw(
UCA_Version alternate
backwards entry
hangul_terminator highestFFFF
identical ignoreName
ignoreChar ignore_level2
katakana_before_hiragana level
long_contraction minimalFFFE
normalization overrideCJK
overrideHangul preprocess
rearrange rewrite
suppress table
undefName undefChar
upper_before_lower variable
),
qw(locale),
],
'Config::Perl::V' => [
qw(
build config derived environment
inc options osname patches
stamp version
)
],
'HTTP::Tiny' => [
qw(
SSL_options agent cookie_jar default_headers
http_proxy https_proxy keep_alive local_address
max_redirect max_size no_proxy proxy
timeout verify_SSL
),
qw( content data_callback peer successders trailer_callback ),
qw( content headers protocol reason redirects status url ),
],
'Math::BigInt' => [qw( lib try only upgrade )],
'Math::BigFloat' => [qw( lib try only )],
'Memoize' => [qw( NORMALIZER INSTALL SCALAR_CACHE LIST_CACHE )],
# Other common modules
'DateTime' => [
qw(
year month week day
hour minute second nanosecond
years months weeks days
hours minutes seconds nanoseconds
time_zone epoch name object
to day_of_year end_of_month formatter
)
],
'Moo' => [
qw(
builder clearer coerce default handles init_arg
is isa lazy moosify predicate reader
required trigger weak_ref writer
)
],
'Compress::Zlib' => [
qw(
-Level -Method -WindowBits -MemLevel
-Strategy -Dictionary -Bufsize
)
],
);
# List of any parent modules with keys to load for a module.
# This can be extended as necessary.
my %parent_modules = (
# Unicode::Collate::Local is a subclass of Unicode::Colate
'Unicode::Collate::Locale' => ['Unicode::Collate'],
);
# Some keys associated with modules starting with a certain text
# These are used in the last step of filtering
my %modules_with_common_keys = (
CCFLAGS => ['ExtUtils::'],
INSTALLDIRS => ['ExtUtils::'],
tests => ['Test::'],
);
my $add_known_keys = sub {
my ( $rhash, $name ) = @_;
foreach my $key ( keys %{$rhash} ) {
if ( !defined( $is_fixed_key{$key} ) ) {
$is_fixed_key{$key} = { $name => 1 };
}
else {
$is_fixed_key{$key}->{$name} = 1;
}
}
}; ## end $add_known_keys = sub
# Add keys which may be unique to this environment.
$add_known_keys->( \%SIG, '$SIG' );
$add_known_keys->( \%ENV, '$ENV' );
$add_known_keys->( \%ERRNO, '$!' );
my $set_known_module_keys = sub {
# Look through the hash of 'use module' statements and populate
# %is_known_module_key, a hash of keys which are not unique if certain
# modules are used. This is called just after we have finished
# scanning the file to help remove known keys.
foreach my $module_seen ( keys %saw_use_module ) {
# Add keys for this module if known
my $rkeys = $known_module_keys{$module_seen};
if ( defined($rkeys) ) {
foreach my $key ( @{$rkeys} ) {
$is_known_module_key{$key} = 1;
}
}
# And add keys for any parent classes
my $rparent_list = $parent_modules{$module_seen};
if ( defined($rparent_list) ) {
foreach my $name ( @{$rparent_list} ) {
my $rk = $known_module_keys{$name};
if ( defined($rk) ) {
foreach my $key ( @{$rk} ) {
$is_known_module_key{$key} = 1;
}
}
}
}
}
return;
}; ## end $set_known_module_keys = sub
my $get_hash_name = sub {
# Get a name of the hash corresponding to a key in hash braces, if
# possible. This will be used to identify related hash keys.
# We have just encountered token at $KK and about to close the stack.
# $rOpts->{'something'}
# | | |
# $Khash $Kbrace $KK
return if ( !@stack );
my $Kbrace = $stack[-1]->{_KK};
my $Khash = $stack[-1]->{_KK_last_nb};
return if ( !defined($Kbrace) );
return if ( !defined($Khash) );
return if ( $rLL->[$Kbrace]->[_TYPE_] ne 'L' );
my $Khash_end = $Khash;
my $token = $rLL->[$Khash]->[_TOKEN_];
# Walk back to find a '$'
if ( $token eq '->' ) {
$Khash = $self->K_previous_code($Khash);
return if ( !defined($Khash) );
$token = $rLL->[$Khash]->[_TOKEN_];
}
if ( $token eq '}' ) {
my $seqno = $rLL->[$Khash]->[_TYPE_SEQUENCE_];
return if ( !defined($seqno) );
my $Ko = $K_opening_container->{$seqno};
return if ( !$Ko );
$Khash = $Ko - 1;
$token = $rLL->[$Khash]->[_TOKEN_];
}
my $ch1 = substr( $token, 0, 1 );
return if ( $ch1 ne '$' && $ch1 ne '*' );
# Construct the final name, removing any spaces
my $hash_name = $token;
my $count = 0;
foreach my $Kh ( $Khash + 1 .. $Khash_end ) {
$hash_name .= $rLL->[$Kh]->[_TOKEN_];
$count++;
}
if ( $count > 0 ) { $hash_name =~ s/\s//g }
return $hash_name;
}; ## end $get_hash_name = sub
my $is_hash_slice = sub {
# We are at an opening hash brace.
# Look back to see if this is a slice.
# Return:
# a name for the slice, or
# undef if not a slice
my ($Ktest) = @_;
my $token = $rLL->[$Ktest]->[_TOKEN_];
# walk back to find a '$'
if ( $token eq '->' ) {
$Ktest = $self->K_previous_code($Ktest);
return if ( !defined($Ktest) );
$token = $rLL->[$Ktest]->[_TOKEN_];
}
if ( $token eq '}' ) {
my $seqno = $rLL->[$Ktest]->[_TYPE_SEQUENCE_];
return if ( !defined($seqno) );
my $Ko = $K_opening_container->{$seqno};
return if ( !$Ko );
$Ktest = $Ko - 1;
$token = $rLL->[$Ktest]->[_TOKEN_];
}
my $ch1 = substr( $token, 0, 1 );
# NOTE: at present, we require an @ sigil to recognize a hash slice.
if ( $ch1 eq '@' ) {
# convert sigil to '$' to match other group members
my $id = '$' . substr( $token, 1 );
return $id;
}
return;
}; ## end $is_hash_slice = sub
my %ancestor_seqno_cache;
my $get_ancestor_seqno = sub {
my ($seqno_in) = @_;
# The goal is to find the outermost common sequence number of
# a tree with hash keys and values. This is needed to help filter
# out large static data trees.
# Given:
# $seqno_in = the sequence number of a list with hash key items
# Task:
# Walk back up the tree in search of the outermost list container
# Return:
# $seqno_out = The most outer ancestor matching ancestor seqno
# Be sure we have a valid starting sequence number
if ( !$seqno_in ) {
return;
}
# Handle a possible parenless-call:
# NOTE: A better strategy might be to keep track of the most recent
# keyword or function name and use it.
if ( $seqno_in <= SEQ_ROOT || !$ris_list_by_seqno->{$seqno_in} ) {
return $seqno_in;
}
# Continue for a normal list..
# use any cached value for efficiency
my $seqno_cache = $ancestor_seqno_cache{$seqno_in};
if ( defined($seqno_cache) ) { return $seqno_cache }
# This will be the outermost container found so far:
my $seqno_out = $seqno_in;
# Loop upward..
my $rparent_of_seqno = $self->[_rparent_of_seqno_];
while ( my $seqno = $rparent_of_seqno->{$seqno_out} ) {
last if ( $seqno == SEQ_ROOT );
if ( $seqno >= $seqno_out || $seqno < SEQ_ROOT ) {
## shouldn't happen - parent containers have lower seq numbers
DEVEL_MODE && Fault(<<EOM);
Error in 'get_ancestor_seqno': expecting seqno=$seqno < last seqno=$seqno_out
EOM
last;
}
last if ( !$ris_list_by_seqno->{$seqno} );
# Be sure this container is part of a list structure, and not for
# example a sub call within a list. The previous token should
# be an opening token or comma or fat comma
my $Ko = $K_opening_container->{$seqno_out};
my $Kp = $self->K_previous_code($Ko);
my $tokp = $Kp ? $rLL->[$Kp]->[_TOKEN_] : ';';
if ( $tokp eq ','
|| $tokp eq '=>'
|| $is_opening_token{$tokp} )
{
# looks ok, keep going
$seqno_out = $seqno;
next;
}
last;
} ## end while ( my $seqno = $rparent_of_seqno...)
$ancestor_seqno_cache{$seqno_in} = $seqno_out;
return $seqno_out;
}; ## end $get_ancestor_seqno = sub
my $is_fixed_hash = sub {
my ( $key, $all_caps, $id ) = @_;
# Given a hash key '$key',
# Return:
# true if it is known and should be excluded
# false if it is not known
my $rhash_names = $is_fixed_key{$key};
# allow any key in all caps to match %ENV
return if ( !$rhash_names && !$all_caps );
# The key is known, now see if its hash name is known
return if ( !$id );
return 1 if ( $all_caps && $id eq '$ENV' );
return 1 if ( $rhash_names->{$id} );
return;
}; ## end $is_fixed_hash = sub
my $is_known_key = sub {
my ($key) = @_;
# Return:
# true if $key is a known key and not unique
# false otherwise
# This sub must be called after the file is scanned, so that all
# 'use' statements have been seen.
my $info = $rhash_key_trove->{$key};
if ( !defined($info) ) {
DEVEL_MODE && Fault("shouldn't happen\n");
return;
}
my $count = $info->{count};
if ( $count > 1 ) {
DEVEL_MODE && Fault("shouldn't happen\n");
return 1;
}
#-----------------------------------------------------------------
# Category 1: keys associated with certain 'use module' statements
#-----------------------------------------------------------------
if ( $is_known_module_key{$key} ) {
return 1;
}
my $id = $info->{hash_id};
return if ( !$id );
#-----------------------------------------------------------------------
# Category 2: # typeglob key: *foo{SCALAR}, or *{$stash->{$var}}{ARRAY}
#-----------------------------------------------------------------------
if ( $is_typeglob_slot_key{$key} && substr( $id, 0, 1 ) eq '*' ) {
return 1;
}
#-----------------------------------------------------------
# Category 3: a key for a fixed hash like %ENV, %SIG, %ERRNO
#-----------------------------------------------------------
my $all_caps = $key =~ /^[A-Z_]+$/;
if ( ( $is_fixed_key{$key} || $all_caps )
&& $is_fixed_hash->( $key, $all_caps, $id ) )
{
return 1;
}
#---------------------------
# Category 4: $Config values
#---------------------------
if ( $id eq '$Config' || $id eq '$Config::Config' ) {
return 1;
}
return;
}; ## end $is_known_key = sub
my $push_KK_last_nb = sub {
# If the previous nonblank token was a hash key of type
# 'Q' or 'w', then update its count
my ( $KK_last_nb, ($parent_seqno) ) = @_;
# Given:
# $KK_last_nb = index of a hash key token
# $parent_seqno = sequence number of container:
# - required for a key followed by '=>'
# - not required for a key in hash braces
my $type_last = $rLL->[$KK_last_nb]->[_TYPE_];
my $token_last = $rLL->[$KK_last_nb]->[_TOKEN_];
my $word;
if ( $type_last eq 'w' ) {
$word = $token_last;
# Combine a leading '-' if any
if ( @mw_list && $mw_list[-1] eq $KK_last_nb ) {
$word = '-' . $word;
# and remove it from the list of quoted words
pop @mw_list;
}
}
elsif ( $type_last eq 'Q' ) {
return if ( length($token_last) < 2 );
# Assume that this is not a multiline Q, since this is a hash key.
my $is_interpolated;
my $ch0 = substr( $token_last, 0, 1 );
if ( $ch0 eq '"' ) {
$word = substr( $token_last, 1, -1 );
$is_interpolated = 1;
}
elsif ( $ch0 eq "'" ) {
$word = substr( $token_last, 1, -1 );
}
else {
my $rQ_info = Q_spy($token_last);
if ( defined($rQ_info) && $rQ_info->{is_simple} ) {
$is_interpolated = $rQ_info->{is_interpolated};
my $nch = $rQ_info->{nch};
$word = substr( $token_last, $nch, -1 );
}
}
# Ignore text with interpolated values
if ($is_interpolated) {
foreach my $sigil ( '$', '@' ) {
my $pos = index( $word, $sigil );
next if ( $pos < 0 );
return if ( $pos == 0 );
my $ch_test = substr( $word, $pos - 1, 1 );
return if ( $ch_test ne '\\' );
}
}
# We accept this as a hash key, so remove it from the quote list
if ( @Q_list && $Q_list[-1]->[0] eq $KK_last_nb ) {
pop @Q_list;
}
else {
## Shouldn't happen
}
}
else {
# not a quote - possibly identifier
return;
}
return unless ($word);
# Bump count of known keys by 1 so that they will not appear as unique
if ( !defined( $rhash_key_trove->{$word} ) ) {
my $slice_name = @stack ? $stack[-1]->{_slice_name} : EMPTY_STRING;
my $id = $parent_seqno;
if ($slice_name) {
$id = $slice_name;
}
elsif ( !$id ) {
$id = $get_hash_name->();
}
else {
$id = $get_ancestor_seqno->($parent_seqno);
}
$rhash_key_trove->{$word} = {
count => 1,
hash_id => $id,
K => $KK_last_nb,
is_known => 0,
};
# save debug info
if ( defined($id) && !defined( $first_key_by_id{$id} ) ) {
$first_key_by_id{$id} = $word;
}
}
else {
$rhash_key_trove->{$word}->{count}++;
}
return;
}; ## end $push_KK_last_nb = sub
my $delete_getopt_subword = sub {
my ($word_in) = @_;
# Given:
# $word= a string which may or may not be a key to Getopts::Long,
# Return:
# Find any corresponding hash key and remove from the unique key list
# Input: Output:
# 'func-mask|M=s' 'func-mask'
# 'foo=s{2,4}' 'foo'
my $word = $word_in;
# split on pipe symbols; the first word is the key
my @parts = split /\|/, $word;
return if ( !@parts );
$word = $parts[0];
# remove one or two optional leading dashes
$word =~ s/^--?//;
# remove any trailing flag
if ( @parts == 1 ) {
$word =~ s/^([\w_\-]+)(?:[\!|\+]|=s|:s|=i|:i|=f|:f)/$1/;
}
# give up if the possible key name does not look reasonable
if ( !$word || $word !~ /^[\w\-]+$/ ) {
return;
}
if ( $K_unique_key{$word} ) {
delete $K_unique_key{$word};
}
return;
}; ## end $delete_getopt_subword = sub
my $dubious_key = sub {
my ($key) = @_;
# Given:
# $key = a key which is unique and about to be filtered out
# Return:
# true if we should not filter it out for some reason
# false if it is ok to filter it out
# Do not remove a key with mixed interior underscores and dashes,
# such as 'encode-output_strings', since this is a common typo.
my $len = length($key);
my $pos_dash = index( $key, '-', 1 );
my $pos_underscore = index( $key, '_', 1 );
my $interior_dash = $pos_dash > 0 && $pos_dash < $len - 1;
my $interior_underscore =
$pos_underscore > 0 && $pos_underscore < $len - 1;
if ( $interior_dash && $interior_underscore ) { return 1 }
# additonal checks can go here
# ok to filter this key out
return;
}; ## end $dubious_key = sub
my $delete_key_if_saw_call = sub {
my ( $key, $subname ) = @_;
# Look for something like "plan('tests'=>" or "plan tests=>"
return if ( !defined( $K_unique_key{$key} ) );
my $K = $K_unique_key{$key};
my $Kp = $self->K_previous_nonblank($K);
if ( defined($Kp) && $rLL->[$Kp]->[_TOKEN_] eq '(' ) {
$Kp = $self->K_previous_nonblank($Kp);
}
if ( defined($Kp) && $rLL->[$Kp]->[_TOKEN_] eq $subname ) {
delete $K_unique_key{$key};
}
return;
}; ## end $delete_key_if_saw_call = sub
my $filter_out_large_sets = sub {
# Look for containers of multiple hash keys which are only defined
# once, and remove them from further consideration. These are probably
# for communication with other packages and thus not of interest. The
# idea is that it is unlikely that the user has misspelled an entire
# set of keys.
my @debug_output;
# Count keys by container:
# _pre_q count is the count before using quotes
# _post_q count is the count after using quotes
my %total_count_by_id;
my %unique_count_by_id_pre_q;
my %unique_count_by_id_post_q;
foreach my $key ( keys %{$rhash_key_trove} ) {
my $count = $rhash_key_trove->{$key}->{count};
my $hash_id = $rhash_key_trove->{$key}->{hash_id};
next if ( !$hash_id );
$total_count_by_id{$hash_id}++;
$unique_count_by_id_pre_q{$hash_id}++ if ( $count == 1 );
$unique_count_by_id_post_q{$hash_id}++
if ( $K_unique_key{$key} );
}
# Find sets of keys which are all, or nearly all, unique.
my %delete_this_id;
foreach my $id ( keys %total_count_by_id ) {
my $total_count = $total_count_by_id{$id};
#---------------------------------------
# This is only for sets of multiple keys
#---------------------------------------
next if ( $total_count <= 1 );
my $unique_count_pre_q = $unique_count_by_id_pre_q{$id};
my $unique_count_post_q = $unique_count_by_id_post_q{$id};
next if ( !$unique_count_pre_q );
if ( !defined($unique_count_post_q) ) {
$unique_count_post_q = 0;
}
# Filter rule: do not issue a warning for a related group
# of keys which has more than N unique keys. The default
# value of N is 2. Delete keys which get filtered out.
$delete_this_id{$id} =
$unique_count_post_q > $rOpts_warn_unique_keys_cutoff;
if ( DEBUG_WUK && defined($id) ) {
my $key = $first_key_by_id{$id};
my $Kfirst = $rhash_key_trove->{$key}->{K};
# TODO: escape $key if it would cause trouble in a .csv file.
# (low priority since this is debug output)
if ( defined($Kfirst) ) {
my $lno = $rLL->[$Kfirst]->[_LINE_INDEX_] + 1;
my $issue_warning =
$unique_count_post_q == 0 ? 'NO'
: $delete_this_id{$id} ? 'NO'
: 'YES';
push @debug_output,
[
$lno, "$id",
"$key", $total_count,
$unique_count_pre_q, $unique_count_post_q,
$issue_warning
];
}
}
}
# locate keys to be deleted
my %mark_as_non_unique;
my %is_dubious_key;
my $dubious_count = 0;
foreach my $key ( keys %{$rhash_key_trove} ) {
my $hash_id = $rhash_key_trove->{$key}->{hash_id};
next if ( !$hash_id );
next if ( !$delete_this_id{$hash_id} );
if ( $dubious_key->($key) ) {
$is_dubious_key{$key} = 1;
$dubious_count++;
}
$mark_as_non_unique{$key} = 1;
}
# Remove keys to be deleted from further consideration
foreach my $key ( keys %mark_as_non_unique ) {
# but keep dubious keys if there is just 1
if ( $is_dubious_key{$key} && $dubious_count == 1 ) { next }
if ( $K_unique_key{$key} ) { delete $K_unique_key{$key} }
}
return if ( !%K_unique_key );
# Check for some keys which are common to a lot of modules
# For example, many modules beginning with 'Test::' have a 'tests' key
foreach my $key ( keys %K_unique_key ) {
my $rmodules = $modules_with_common_keys{$key};
# This is a common key
if ($rmodules) {
foreach my $module ( @{$rmodules} ) {
# If we saw a module which matches the start of the name...
foreach my $module_seen ( keys %saw_use_module ) {
# we can remove it
if ( index( $module_seen, $module ) == 0 ) {
delete $K_unique_key{$key};
last;
}
}
}
}
next if ( !$K_unique_key{$key} );
# Some additional filters when the cutoff is 1
if ( $rOpts_warn_unique_keys_cutoff <= 1 ) {
# Delete key if it is not contained in a list
# i.e. use overload 'xx' => ...
my $hash_id = $rhash_key_trove->{$key}->{hash_id};
if ( !$hash_id || $hash_id eq '1' ) {
delete $K_unique_key{$key};
next;
}
# Delete key if ALL CAPS
if ( $key eq uc($key) ) {
delete $K_unique_key{$key};
next;
}
if ( $key eq 'tests' ) {
$delete_key_if_saw_call->( $key, 'plan' );
next;
}
}
}
if (@debug_output) {
@debug_output = sort { $a->[0] <=> $b->[0] } @debug_output;
print <<EOM;
line,id,first-key,total-count,early-count,late-count,warn?
EOM
foreach my $rvals (@debug_output) {
my $line = join ',', @{$rvals};
print $line, "\n";
}
}
return;
}; ## end $filter_out_large_sets = sub
my $delete_unique_quoted_words = sub {
my ( $rlist, $missing_GetOptions_keys ) = @_;
# Given:
# $rlist = ref to list of words seen in quotes, or a single word
# $missing_GetOptions_keys = true if we saw 'use Getopt::Long'
# but did not see its control hash
# Task:
# remove matches to the current list of unique words
if ( !ref($rlist) ) { $rlist = [$rlist] }
foreach my $word ( @{$rlist} ) {
# remove quotes
if ( $K_unique_key{$word} ) {
delete $K_unique_key{$word};
}
if ( $missing_GetOptions_keys
&& $word !~ /^\w[\w\-]*$/
&& $word !~ /\s/ )
{
$delete_getopt_subword->($word);
}
}
return;
}; ## end $delete_unique_quoted_words = sub
my $is_static_hash_key = sub {
my ($Ktest) = @_;
# Return:
# true if $Ktest is a simple fixed quote-like hash key
# false otherwise
return if ( !defined($Ktest) );
my $type = $rLL->[$Ktest]->[_TYPE_];
# This is just for barewords and quoted text
return unless ( $type eq 'w' || $type eq 'Q' );
# Backup one token at a dashed bareword
if ( @mw_list && $mw_list[-1] eq $Ktest ) { $Ktest -= 1 }
# Now look back for a comma or opening hash brace
$Ktest -= 1;
return if ( $Ktest <= 0 );
$type = $rLL->[$Ktest]->[_TYPE_];
if ( $type eq 'b' ) {
$Ktest--;
$type = $rLL->[$Ktest]->[_TYPE_];
}
if ( $type eq '#' ) {
$Ktest--;
$type = $rLL->[$Ktest]->[_TYPE_];
if ( $type eq 'b' ) {
$Ktest--;
$type = $rLL->[$Ktest]->[_TYPE_];
}
}
if ( $type eq 'L' ) { return 1 }
if ( $type eq ',' ) {
if ( @stack && $stack[-1]->{_slice_name} ) {
return 1;
}
}
return;
}; ## end $is_static_hash_key = sub
# Optimization: we just need to look at these non-blank types
my %is_special_check_type = ( %is_opening_type, %is_closing_type );
@q = qw( => Q q k U w h );
push @q, ',';
@is_special_check_type{@q} = (1) x scalar(@q);
# Values defined during token scan:
my @K_start_qw_list;
my $Getopt_Std_hash_id; # name of option hash for 'use Getopt::Std'
my $ix_HERE_END = -1; # the line index of the last here target read
my @keys_in_HERE_docs;
my @GetOptions_keys;
my $saw_Getopt_Long; # for 'use Getopt::Long'
my $saw_Getopt_Std; # for 'use Getopt::Std'
my %is_GetOptions_call_by_seqno;
my %is_GetOptions_call;
@q = qw( GetOptions GetOptionsFromArray GetOptionsFromString );
@is_GetOptions_call{@q} = (1) x scalar(@q);
#----------------------------------------------------------------
# PHASE 1: loop over all tokens to find hash keys and save quotes
#----------------------------------------------------------------
my $KK = -1;
my $K_end_skip = -1; # allow skipping hash definitions in code sections
my $KK_last_nb; # previous non-blank, non-comment value of $KK
my $type;
while ( ++$KK <= $Klimit ) {
# Skip a blank token
if ( ( $type = $rLL->[$KK]->[_TYPE_] ) eq 'b' ) { next }
# Skip token types which do not need to be examined
elsif ( !$is_special_check_type{$type} ) {
$KK_last_nb = $KK if ( $type ne '#' );
next;
}
#-----------------------------------------------------------
# NOTE: update $KK_last_nb before any 'next' out of the loop
#-----------------------------------------------------------
elsif ( $is_opening_type{$type} ) {
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ( !$seqno ) {
## tokenization error - shouldn't happen
DEVEL_MODE && Fault("no sequence number for type $type\n");
$KK_last_nb = $KK;
next;
}
my $slice_name;
if ( $type eq 'L' ) {
# Skip past something like ${word}
my $type_last =
defined($KK_last_nb)
? $rLL->[$KK_last_nb]->[_TYPE_]
: 'b';
if ( $type_last eq 't' ) {
my $Kc = $K_closing_container->{$seqno};
my $Kn = $self->K_next_code($KK);
$Kn = $self->K_next_code($Kn);
if ( $Kn && $Kc && $Kn == $Kc && $Kc > $K_end_skip ) {
$K_end_skip = $Kc;
}
}
# check for a slice
my $rtype_count = $rtype_count_by_seqno->{$seqno};
if ( $rtype_count->{','} ) {
$slice_name = $is_hash_slice->($KK_last_nb);
}
}
push @stack,
{
_seqno => $seqno,
_KK => $KK,
_KK_last_nb => $KK_last_nb,
_slice_name => $slice_name,
};
}
elsif ( $is_closing_type{$type} ) {
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ( !$seqno ) {
## tokenization error - shouldn't happen
DEVEL_MODE && Fault("no sequence number for type $type\n");
$KK_last_nb = $KK;
next;
}
if ( $type eq 'R' ) {
if ( $is_static_hash_key->($KK_last_nb) ) {
$push_KK_last_nb->($KK_last_nb)
if ( $KK_last_nb > $K_end_skip );
}
}
my $item = pop @stack;
if ( !$item || $item->{_seqno} != $seqno ) {
if (DEVEL_MODE) {
# shouldn't happen for a balanced file
my $num = @stack;
my $got = $num ? $item->{_seqno} : 'undef';
my $lno = $rLL->[$KK]->[_LINE_INDEX_];
Fault <<EOM;
stack error at seqno=$seqno type=$type num=$num got seqno=$got lno=$lno
EOM
}
}
}
elsif ( $type eq ',' ) {
# in a slice?
if ( @stack && $stack[-1]->{_slice_name} ) {
if ( $is_static_hash_key->($KK_last_nb) ) {
$push_KK_last_nb->($KK_last_nb)
if ( $KK_last_nb > $K_end_skip );
}
}
}
elsif ( $type eq 'k' ) {
# Look for 'use constant' and define its ending token
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $token eq 'use' || $token eq 'require' ) {
my $Kn = $self->K_next_code($KK);
if ( !defined($Kn) ) {
$KK_last_nb = $KK;
next;
}
my $token_n = $rLL->[$Kn]->[_TOKEN_];
$saw_use_module{$token_n} = $Kn;
# Check for some specific modules
if ( index( $token_n, 'Getopt::Std' ) == 0 ) {
$saw_Getopt_Std = 1;
}
elsif ( index( $token_n, 'Getopt::Long' ) == 0 ) {
$saw_Getopt_Long = 1;
}
elsif ( $token_n eq 'constant' && $token eq 'use' ) {
# Handle 'use constant' ... we will skip these hash keys.
# For example, we do not want to mark '_mode_' and '_uid_'
# here as unique hash keys since they become subs:
# use constant { _mode_ => 2, _uid_ => 4 }
$Kn = $self->K_next_code($Kn);
if ( !defined($Kn) ) {
$KK_last_nb = $KK;
next;
}
my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
if ($seqno_n) {
# Set flag to skip over a block of constant definitions.
if ( $rLL->[$Kn]->[_TOKEN_] eq '{' ) {
$K_end_skip = $K_closing_container->{$seqno_n};
}
else {
## unexpected format, skip
}
}
else {
# skip a single constant definition
$K_end_skip = $Kn + 1;
}
}
else {
## not special
}
}
}
elsif ( $type eq 'Q' ) {
# Find the entire range in case of multiline quotes.
my $KK_end_Q = $KK;
while ($KK_end_Q < $Klimit
&& $rLL->[ $KK_end_Q + 1 ]->[_TYPE_] eq 'Q' )
{
$KK_end_Q++;
}
# Save for later comparison with hash keys.
my $seqno_Q = @stack ? $stack[-1]->{_seqno} : undef;
push @Q_list, [ $KK, $KK_end_Q, $seqno_Q ];
# Move loop index to the end of this quote
$KK = $KK_end_Q;
}
elsif ( $type eq 'q' ) {
if ( !defined($KK_last_nb)
|| $rLL->[$KK_last_nb]->[_TYPE_] ne 'q' )
{
push @K_start_qw_list, $KK;
}
}
elsif ( $type eq 'U' || $type eq 'w' ) {
# 'GetOptions(' will marked be type 'U'
# 'GetOptions (' will be marked type 'w' # has space '('
my $token = $rLL->[$KK]->[_TOKEN_];
# Look GetOptions call (Getopt::Long, for example:
# GetOptions ("length=i" => \$length,
# "file=s" => \$data)
if ( $is_GetOptions_call{$token} ) {
my $Kn = $self->K_next_nonblank($KK);
if ( $Kn && $rLL->[$Kn]->[_TOKEN_] eq '(' ) {
my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
$is_GetOptions_call_by_seqno{$seqno_n} = 1;
}
}
# Look getopts call (Getopt::Std), for example:
# getopts('oif:')
# getopts('oif:', \my %opts);
# getopt('oDI:', \my %opts);
elsif ( $token eq 'getopts' || $token eq 'getopt' ) {
my $Kn = $self->K_next_nonblank($KK);
if ( $Kn && $rLL->[$Kn]->[_TOKEN_] eq '(' ) {
# Look for the first arg as a quoted string
my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
$Kn = $self->K_next_nonblank($Kn);
if ( $Kn && $rLL->[$Kn]->[_TYPE_] eq 'Q' ) {
push @Q_getopts, $Kn;
}
# Look for hash name if two-arg call
$Kn = $self->K_next_nonblank($Kn);
if ( $Kn && $rLL->[$Kn]->[_TYPE_] eq ',' ) {
my $Kc = $K_closing_container->{$seqno_n};
$Kn = $self->K_previous_code($Kc);
my $id = $rLL->[$Kn]->[_TOKEN_];
$id =~ s/^\%/\$/;
$Getopt_Std_hash_id = $id;
}
}
}
# check for -word
elsif ($KK > 0
&& $rLL->[ $KK - 1 ]->[_TOKEN_] eq '-'
&& $rLL->[ $KK - 1 ]->[_TYPE_] eq 'm' )
{
push @mw_list, $KK;
}
else {
## no other special checks
}
}
elsif ( $type eq '=>' ) {
my $parent_seqno = $self->parent_seqno_by_K($KK);
if ( $is_GetOptions_call_by_seqno{$parent_seqno} ) {
push @GetOptions_keys, $KK_last_nb;
}
else {
$push_KK_last_nb->( $KK_last_nb, $parent_seqno )
if ( $KK_last_nb > $K_end_skip );
}
}
# a here doc - look for interpolated hash keys
elsif ( $type eq 'h' ) {
my $ix_line = $rLL->[$KK]->[_LINE_INDEX_];
my $ix_HERE = max( $ix_HERE_END, $ix_line );
# collect the here doc text
( $ix_HERE_END, my $here_text ) = $self->get_here_text($ix_HERE);
# Any found keys are saved for checking against keys found
# in the text, but they are not entered as candidates for
# unique keys.
my $token = $rLL->[$KK]->[_TOKEN_];
if ( is_interpolated_here_doc($token) ) {
my $rkeys = get_interpolated_hash_keys($here_text);
push @keys_in_HERE_docs, @{$rkeys};
}
}
else {
DEVEL_MODE && Fault("missing code for type $type\n");
}
$KK_last_nb = $KK;
} ## end while ( ++$KK <= $Klimit )
# Make a list of keys known to any modules which have been seen
$set_known_module_keys->();
my $missing_GetOptions_keys =
$saw_Getopt_Long
&& %is_GetOptions_call_by_seqno
&& !@GetOptions_keys;
#----------------------------------------------------
# PHASE 2: remove unique keys which match quoted text
#----------------------------------------------------
# Find hash keys seen just one time
foreach my $key ( keys %{$rhash_key_trove} ) {
my $count = $rhash_key_trove->{$key}->{count};
next if ( $count != 1 );
# Filter out some known keys
if ( $is_known_key->($key) ) {
$rhash_key_trove->{$key}->{is_known} = 1;
next;
}
my $K = $rhash_key_trove->{$key}->{K};
$K_unique_key{$key} = $K;
}
return if ( !%K_unique_key );
# Now go back and look for these keys in any saved quotes ...
# Check each unique word against the list of type Q tokens
if (@Q_list) {
my $imax = $#Q_list;
foreach my $i ( 0 .. $imax ) {
my ( $K, $Kend, $seqno_Q ) = @{ $Q_list[$i] };
my $string = $rLL->[$K]->[_TOKEN_];
# Determine the quote type from its leading characters.
# Note that tokens in a qwaf call are not contained in quote marks.
my $is_qwaf_Q = defined($seqno_Q) && $ris_qwaf_by_seqno->{$seqno_Q};
my $ib = 0;
my $is_interpolated = 0;
if ( !$is_qwaf_Q ) {
next if ( length($string) < 2 );
my $ch1 = substr( $string, 0, 1 );
if ( $ch1 eq '"' ) {
$ib = 1;
$is_interpolated = 1;
}
elsif ( $ch1 eq "'" ) {
$ib = 1;
$is_interpolated = 0;
}
else {
my $rQ_info = Q_spy($string);
next if ( !defined($rQ_info) );
$ib = $rQ_info->{nch};
$is_interpolated = $rQ_info->{is_interpolated};
}
}
my $is_multiline = 0;
if ( $Kend > $K ) {
$is_multiline = 1;
foreach my $Kx ( $K + 1 .. $Kend ) {
$string .= $rLL->[$Kx]->[_TOKEN_];
}
}
# Strip off leading and ending quote characters.
# Note: we do not need to be precise on removing ending characters
# in this case.
my $word = $is_qwaf_Q ? $string : substr( $string, $ib, -1 );
if ($is_interpolated) {
my $rkeys = get_interpolated_hash_keys($word);
foreach my $key ( @{$rkeys} ) {
if ( $K_unique_key{$key} ) {
delete $K_unique_key{$key};
}
}
}
# Ignore multiline quotes for the remaining checks
if ( !$is_multiline ) {
$delete_unique_quoted_words->( $word,
$missing_GetOptions_keys );
}
}
}
return if ( !%K_unique_key );
# Check list of barewords quoted with a leading dash
if (@mw_list) {
foreach my $Kmw (@mw_list) {
my $word = '-' . $rLL->[$Kmw]->[_TOKEN_];
if ( $K_unique_key{$word} ) {
delete $K_unique_key{$word};
}
}
}
return if ( !%K_unique_key );
# Check words against any hash keys in here docs
foreach my $key (@keys_in_HERE_docs) {
if ( $K_unique_key{$key} ) {
delete $K_unique_key{$key};
}
}
return if ( !%K_unique_key );
# Check words against any option keys passed to GetOptions
foreach my $Kopt (@GetOptions_keys) {
my $word = $rLL->[$Kopt]->[_TOKEN_];
my $ch1 = substr( $word, 0, 1 );
if ( $ch1 eq "'" || $ch1 eq '"' ) {
$word = substr( $word, 1, -1 );
}
if ( $K_unique_key{$word} ) {
delete $K_unique_key{$word};
}
# remove any optional flag and retry
if ( $word !~ /^\w[\w\-]*$/
&& $word !~ /\s/ )
{
$delete_getopt_subword->($word);
}
}
return if ( !%K_unique_key );
# For two-arg call to Getopt::Std ...
if ( $Getopt_Std_hash_id && $saw_Getopt_Std ) {
# If we managed to read the first arg..remove single letters seen
foreach my $Kopt (@Q_getopts) {
my $word = $rLL->[$Kopt]->[_TOKEN_];
my $ch1 = substr( $word, 0, 1 );
if ( $ch1 eq "'" || $ch1 eq '"' ) {
$word = substr( $word, 1, -1 );
}
$word =~ s/://g;
my @letters = split //, $word;
foreach my $letter (@letters) {
if ( $K_unique_key{$letter} ) {
delete $K_unique_key{$letter};
}
}
}
# If we found a getopts hash name but did not read the first string,
# remove all single-character keys in that hash name (typically $opt)
if ( !@Q_getopts ) {
foreach my $key ( keys %K_unique_key ) {
next if ( length($key) != 1 );
next if ( $key !~ /[A-Za-z\?]/ );
# For now, delete any single letter key.
# The hash name can become a ref with different name
# through sub calls.
##my $hash_id = $rhash_key_trove->{$key}->{hash_id};
##if ( $hash_id && $hash_id eq $Getopt_Std_hash_id ) {
delete $K_unique_key{$key};
##}
}
}
}
return if ( !%K_unique_key );
# Remove any keys which are also in a qw list
foreach my $Kqw (@K_start_qw_list) {
my ( $K_last_q_uu, $rlist ) = $self->get_qw_list($Kqw);
if ( !defined($rlist) ) {
## shouldn't happen: must be a bad index $Kqw in @K_start_qw_list
my ( $lno, $type_qw, $token_qw ) = qw ( undef undef undef );
if ( defined($Kqw)
&& $Kqw >= 0
&& $Kqw <= $Klimit )
{
$lno = $rLL->[$Kqw]->[_LINE_INDEX_] + 1;
$type_qw = $rLL->[$Kqw]->[_TYPE_];
$token_qw = $rLL->[$Kqw]->[_TOKEN_];
}
DEVEL_MODE
&& Fault(
"$lno: Empty return for K=$Kqw type='$type_qw' token='$token_qw'\n"
);
next;
}
$delete_unique_quoted_words->( $rlist, $missing_GetOptions_keys );
}
return if ( !%K_unique_key );
#------------------------------------------------------------------
# PHASE 3: filter out multiple related keys which are mostly unique
#------------------------------------------------------------------
$filter_out_large_sets->();
return if ( !%K_unique_key );
#-------------------------------------------
# PHASE 4: Report any remaining unique words
#-------------------------------------------
my $output_string = EMPTY_STRING;
my @list;
foreach my $word ( keys %K_unique_key ) {
my $K = $K_unique_key{$word};
my $lno = $rLL->[$K]->[_LINE_INDEX_] + 1;
push @list, [ $word, $lno ];
}
@list = sort { $a->[1] <=> $b->[1] || $a->[0] cmp $b->[0] } @list;
foreach my $item (@list) {
my ( $word, $lno ) = @{$item};
$output_string .= "$word,$lno\n";
}
return $output_string;
} ## end sub scan_unique_keys
sub dump_unique_keys {
my ($self) = @_;
# Dump a list of hash keys used just one time to STDOUT
# This sub is called when
# --dump-unique-keys (-duk) is set.
my $output_string = $self->scan_unique_keys();
if ($output_string) {
my $input_stream_name = get_input_stream_name();
chomp $output_string;
print {*STDOUT} <<EOM;
==> $input_stream_name <==
$output_string
EOM
}
return;
} ## end sub dump_unique_keys
sub warn_unique_keys {
my ($self) = @_;
# process a --warn-unique-keys command
my $wuk_key = 'warn-unique-keys';
my $wukc_key = 'warn-unique-keys-cutoff';
my $output_string = $self->scan_unique_keys();
if ($output_string) {
my $message =
"Begin scan for --$wuk_key using --$wukc_key=$rOpts_warn_unique_keys_cutoff\n";
$message .= $output_string;
$message .= "End scan for --$wuk_key\n";
warning($message);
}
return;
} ## end sub warn_unique_keys
sub dump_block_summary {
my ($self) = @_;
# Dump information about selected code blocks to STDOUT
# This sub is called when
# --dump-block-summary (-dbs) is set.
# The following controls are available:
# --dump-block-types=s (-dbt=s), where s is a list of block types
# (if else elsif for foreach while do ... sub) ; default is 'sub'
# --dump-block-minimum-lines=n (-dbml=n), where n is the minimum
# number of lines for a block to be included; default is 20.
my $rOpts_dump_block_types = $rOpts->{'dump-block-types'};
if ( !defined($rOpts_dump_block_types) ) { $rOpts_dump_block_types = 'sub' }
$rOpts_dump_block_types =~ s/^\s+//;
$rOpts_dump_block_types =~ s/\s+$//;
my @list = split /\s+/, $rOpts_dump_block_types;
my %dump_block_types;
@dump_block_types{@list} = (1) x scalar(@list);
# Get level variation info for code blocks
my $rlevel_info = $self->find_level_info();
# Get block info
my $rselected_blocks =
$self->find_selected_blocks( \%dump_block_types, $rlevel_info );
# Get if-chains
my $rselected_if_chains =
$self->find_if_chains( \%dump_block_types, $rlevel_info );
# Get package info
my $rpackages = $self->find_selected_packages( \%dump_block_types );
# merge
my @all_blocks =
( @{$rselected_blocks}, @{$rselected_if_chains}, @{$rpackages} );
return unless (@all_blocks);
my $input_stream_name = get_input_stream_name();
# Get code line count
my $rcode_line_count = $self->find_code_line_count();
# Get mccabe count
my $rmccabe_count_sum = $self->find_mccabe_count();
my $rOpts_dump_block_minimum_lines = $rOpts->{'dump-block-minimum-lines'};
if ( !defined($rOpts_dump_block_minimum_lines) ) {
$rOpts_dump_block_minimum_lines = 20;
}
my $rLL = $self->[_rLL_];
# add various counts, filter and print to STDOUT
my $routput_lines = [];
foreach my $item (@all_blocks) {
my $K_opening = $item->{K_opening};
my $K_closing = $item->{K_closing};
# define total number of lines
my $lx_open = $rLL->[$K_opening]->[_LINE_INDEX_];
my $lx_close = $rLL->[$K_closing]->[_LINE_INDEX_];
my $line_count = $lx_close - $lx_open + 1;
# define total number of lines of code excluding blanks, comments, pod
my $code_lines_open = $rcode_line_count->[$lx_open];
my $code_lines_close = $rcode_line_count->[$lx_close];
my $code_lines = 0;
if ( defined($code_lines_open) && defined($code_lines_close) ) {
$code_lines = $code_lines_close - $code_lines_open + 1;
}
# filter out blocks below the selected code line limit
if ( $code_lines < $rOpts_dump_block_minimum_lines ) {
next;
}
# add mccabe_count for this block
my $mccabe_closing = $rmccabe_count_sum->{ $K_closing + 1 };
my $mccabe_opening = $rmccabe_count_sum->{$K_opening};
my $mccabe_count = 1; # add 1 to match Perl::Critic
if ( defined($mccabe_opening) && defined($mccabe_closing) ) {
$mccabe_count += $mccabe_closing - $mccabe_opening;
}
# Store the final set of print variables
# Note: K_opening is added for sorting but deleted before printing
push @{$routput_lines}, [
$input_stream_name,
$item->{line_start},
$line_count,
$code_lines,
$item->{type},
$item->{name},
$item->{level},
$item->{max_change},
$item->{block_count},
$mccabe_count,
$K_opening,
];
}
return unless @{$routput_lines};
# Sort blocks and packages on starting line number
my @sorted_lines = sort { $a->[-1] <=> $b->[-1] } @{$routput_lines};
print {*STDOUT}
"file,line,line_count,code_lines,type,name,level,max_change,block_count,mccabe_count\n";
foreach my $rline_vars (@sorted_lines) {
# remove K_opening which was added for stable sorting
pop @{$rline_vars};
my $line = join( ",", @{$rline_vars} ) . "\n";
print {*STDOUT} $line;
}
return;
} ## end sub dump_block_summary
sub set_ci {
my ($self) = @_;
# Set the basic continuation indentation (ci) for all tokens.
# This is a replacement for the values previously computed in
# sub Perl::Tidy::Tokenizer::tokenizer_wrapup. In most cases it
# produces identical results, but in a few cases it is an improvement.
use constant DEBUG_SET_CI => 0;
# This turns on an optional piece of logic which makes the new and
# old computations of ci agree. It has almost no effect on actual
# programs but is useful for testing.
use constant SET_CI_OPTION_0 => 1;
# This is slightly different from the hash in in break_lists
# with a similar name (removed '?' and ':' to fix t007 and others)
my %is_logical_container_for_ci;
my @q = qw# if elsif unless while and or err not && | || ! #;
@is_logical_container_for_ci{@q} = (1) x scalar(@q);
# This is slightly different from a tokenizer hash with a similar name:
my %is_container_label_type_for_ci;
@q = qw# k && | || ? : ! #;
@is_container_label_type_for_ci{@q} = (1) x scalar(@q);
# Undo ci of closing list paren followed by these binary operators:
# - initially defined for issue t027, then
# - added '=' for t015
# - added '=~' for 'locale.in'
# - added '<=>' for 'corelist.in'
# Note:
# See @value_requestor_type for more that might be included
# See also @is_binary_type
my %bin_op_type;
@q = qw# . ** -> + - / * = != ^ < > % >= <= =~ !~ <=> x #;
@bin_op_type{@q} = (1) x scalar(@q);
my %is_list_end_type;
@q = qw( ; { } );
push @q, ',';
@is_list_end_type{@q} = (1) x scalar(@q);
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
return unless defined($Klimit);
my $token = ';';
my $type = ';';
my $last_token = $token;
my $last_type = $type;
my $ci_last = 0;
my $ci_next = 0;
my $ci_next_next = 1;
my $rstack = [];
my $seq_root = SEQ_ROOT;
my $rparent = {
_seqno => $seq_root,
_ci_open => 0,
_ci_open_next => 0,
_ci_close => 0,
_ci_close_next => 0,
_container_type => 'Block',
_ci_next_next => $ci_next_next,
_comma_count => 0,
_semicolon_count => 0,
_Kc => undef,
};
# Debug stuff
my @debug_lines;
my %saw_ci_diff;
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_sub_block = $self->[_ris_sub_block_];
my $ris_asub_block = $self->[_ris_asub_block_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $K_closing_ternary = $self->[_K_closing_ternary_];
my $rlines = $self->[_rlines_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
my $want_break_before_comma = $want_break_before{','};
my $map_block_follows = sub {
# return true if a sort/map/etc block follows the closing brace
# of container $seqno
my ($seqno) = @_;
my $Kc = $K_closing_container->{$seqno};
return unless defined($Kc);
# Skip past keyword
my $Kcn = $self->K_next_code($Kc);
return unless defined($Kcn);
my $seqno_n = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
return if ($seqno_n);
# Look for opening block brace
my $Knn = $self->K_next_code($Kcn);
return unless defined($Knn);
my $seqno_nn = $rLL->[$Knn]->[_TYPE_SEQUENCE_];
return unless ($seqno_nn);
my $K_nno = $K_opening_container->{$seqno_nn};
return unless ( $K_nno && $K_nno == $Knn );
my $block_type = $rblock_type_of_seqno->{$seqno_nn};
if ($block_type) {
return $is_block_with_ci{$block_type};
}
return;
}; ## end $map_block_follows = sub
my $redo_preceding_comment_ci = sub {
# We need to reset the ci of the previous comment(s)
my ( $K, $ci ) = @_;
my $Km = $self->K_previous_code($K);
return if ( !defined($Km) );
foreach my $Kt ( $Km + 1 .. $K - 1 ) {
if ( $rLL->[$Kt]->[_TYPE_] eq '#' ) {
$rLL->[$Kt]->[_CI_LEVEL_] = $ci;
}
}
return;
}; ## end $redo_preceding_comment_ci = sub
# Definitions of the sequence of ci_values being maintained:
# $ci_last = the ci value of the previous non-blank, non-comment token
# $ci_this = the ci value to be stored for this token at index $KK
# $ci_next = the normal ci for the next token, set by the previous tok
# $ci_next_next = the normal next value of $ci_next in this container
#--------------------------
# Main loop over all tokens
#--------------------------
my $KK = -1;
foreach my $rtoken_K ( @{$rLL} ) {
$KK++;
#------------------
# Section 1. Blanks
#------------------
if ( ( $type = $rtoken_K->[_TYPE_] ) eq 'b' ) {
$rtoken_K->[_CI_LEVEL_] = $ci_next;
# 'next' to avoid saving last_ values for blanks and commas
next;
}
#--------------------
# Section 2. Comments
#--------------------
if ( $type eq '#' ) {
my $ci_this = $ci_next;
# If at '#' in ternary before a ? or :, use that level to make
# the comment line up with the next ? or : line. (see c202/t052)
# i.e. if a nested ? follows, we increase the '#' level by 1, and
# if a nested : follows, we decrease the '#' level by 1.
# This is the only place where this sub changes a _LEVEL_ value.
my $Kn;
my $parent_container_type = $rparent->{_container_type};
if ( $parent_container_type eq 'Ternary' ) {
$Kn = $self->K_next_code($KK);
if ($Kn) {
my $type_kn = $rLL->[$Kn]->[_TYPE_];
if ( $is_ternary{$type_kn} ) {
$rLL->[$KK]->[_LEVEL_] = $rLL->[$Kn]->[_LEVEL_];
# and use the ci of a terminating ':'
if ( $Kn == $rparent->{_Kc} ) {
$ci_this = $rparent->{_ci_close};
}
}
}
}
# Undo ci for a block comment followed by a closing token or , or ;
# provided that the parent container:
# - ends without ci, or
# - starts ci=0 and is a comma list or this follows a closing type
# - has a level jump
if (
$ci_this
&& (
!$rparent->{_ci_close}
|| (
!$rparent->{_ci_open_next}
&& ( ( $rparent->{_comma_count} || $last_type eq ',' )
|| $is_closing_type{$last_type} )
)
)
)
{
# Be sure this is a block comment
my $lx = $rtoken_K->[_LINE_INDEX_];
my $rK_range = $rlines->[$lx]->{_rK_range};
my $Kfirst;
if ($rK_range) { $Kfirst = $rK_range->[0] }
if ( defined($Kfirst) && $Kfirst == $KK ) {
# Look for trailing closing token
# [ and possibly ',' or ';' ]
$Kn = $self->K_next_code($KK) if ( !$Kn );
my $Kc = $rparent->{_Kc};
if (
$Kn
&& $Kc
&& (
$Kn == $Kc
# only look for comma if -wbb=',' is set
# to minimize changes to existing formatting
|| ( $rLL->[$Kn]->[_TYPE_] eq ','
&& $want_break_before_comma
&& $parent_container_type eq 'List' )
# do not look ahead for a bare ';' because
# it changes old formatting with little benefit.
## || ( $rLL->[$Kn]->[_TYPE_] eq ';'
## && $parent_container_type eq 'Block' )
)
)
{
# Be sure container has a level jump
my $level_KK = $rLL->[$KK]->[_LEVEL_];
my $level_Kc = $rLL->[$Kc]->[_LEVEL_];
# And be sure this is not a hanging side comment
my $CODE_type = $rlines->[$lx]->{_code_type};
my $is_HSC = $CODE_type && $CODE_type eq 'HSC';
if ( $level_Kc < $level_KK && !$is_HSC ) {
$ci_this = 0;
}
}
}
}
$ci_next = $ci_this;
$rtoken_K->[_CI_LEVEL_] = $ci_this;
# 'next' to avoid saving last_ values for blanks and commas
next;
}
#------------------------------------------------------------
# Section 3. Continuing with non-blank and non-comment tokens
#------------------------------------------------------------
$token = $rtoken_K->[_TOKEN_];
# Set ci values appropriate for most tokens:
my $ci_this = $ci_next;
$ci_next = $ci_next_next;
# Now change these ci values as necessary for special cases...
#----------------------------
# Section 4. Container tokens
#----------------------------
if ( $rtoken_K->[_TYPE_SEQUENCE_] ) {
my $seqno = $rtoken_K->[_TYPE_SEQUENCE_];
#-------------------------------------
# Section 4.1 Opening container tokens
#-------------------------------------
if ( $is_opening_sequence_token{$token} ) {
my $level = $rtoken_K->[_LEVEL_];
# Default ci values for the closing token, to be modified
# as necessary:
my $ci_close = $ci_next;
my $ci_close_next = $ci_next_next;
my $Kc =
$type eq '?'
? $K_closing_ternary->{$seqno}
: $K_closing_container->{$seqno};
# $Kn = $self->K_next_nonblank($KK);
my $Kn;
if ( $KK < $Klimit ) {
$Kn = $KK + 1;
if ( $rLL->[$Kn]->[_TYPE_] eq 'b' && $Kn < $Klimit ) {
$Kn += 1;
}
}
# $Kcn = $self->K_next_code($Kc);
my $Kcn;
if ( $Kc && $Kc < $Klimit ) {
$Kcn = $Kc + 1;
if ( $rLL->[$Kcn]->[_TYPE_] eq 'b' && $Kcn < $Klimit ) {
$Kcn += 1;
}
if ( $rLL->[$Kcn]->[_TYPE_] eq '#' ) {
$Kcn = $self->K_next_code($Kcn);
}
}
my $opening_level_jump =
$Kn ? $rLL->[$Kn]->[_LEVEL_] - $level : 0;
# initialize ci_next_next to its standard value
$ci_next_next = 1;
# Default: ci of first item of list with level jump is same as
# ci of first item of container
if ( $opening_level_jump > 0 ) {
$ci_next = $rparent->{_ci_open_next};
}
my ( $comma_count, $semicolon_count );
my $rtype_count = $rtype_count_by_seqno->{$seqno};
if ($rtype_count) {
$comma_count = $rtype_count->{','};
$semicolon_count = $rtype_count->{';'};
# Do not include a terminal semicolon in the count (the
# comma_count has already been corrected by respace_tokens)
# We only need to know if there are semicolons or not, so
# for speed we can just do this test if the count is 1.
if ( $semicolon_count && $semicolon_count == 1 ) {
my $Kcm = $self->K_previous_code($Kc);
if ( $rLL->[$Kcm]->[_TYPE_] eq ';' ) {
$semicolon_count--;
}
}
}
my $container_type;
#-------------------------
# Section 4.1.1 Code Block
#-------------------------
my $block_type = $rblock_type_of_seqno->{$seqno};
if ($block_type) {
$container_type = 'Block';
# set default depending on block type
$ci_close = 0;
my $no_semicolon =
$is_block_without_semicolon{$block_type}
|| $ris_sub_block->{$seqno}
|| $last_type eq 'J';
if ( !$no_semicolon ) {
# Optional fix for block types sort/map/etc which use
# zero ci at terminal brace if previous keyword had
# zero ci. This will cause sort/map/grep filter blocks
# to line up. Note that sub 'undo_ci' will also try to
# do this, so this is not a critical operation.
if ( $is_block_with_ci{$block_type} ) {
my $parent_seqno = $rparent->{_seqno};
if (
# only do this within containers
$parent_seqno != SEQ_ROOT
# only in containers without ',' and ';'
&& !$rparent->{_comma_count}
&& !$rparent->{_semicolon_count}
&& $map_block_follows->($seqno)
)
{
if ($ci_last) {
$ci_close = $ci_this;
}
}
else {
$ci_close = $ci_this;
}
}
# keep ci if certain operators follow (fix c202/t024)
if ( !$ci_close && $Kcn ) {
my $type_kcn = $rLL->[$Kcn]->[_TYPE_];
my $token_kcn = $rLL->[$Kcn]->[_TOKEN_];
if ( $type_kcn =~ /^(\.|\&\&|\|\|)$/
|| $type_kcn eq 'k' && $is_and_or{$token_kcn} )
{
$ci_close = $ci_this;
}
}
}
if ( $rparent->{_container_type} ne 'Ternary' ) {
$ci_this = 0;
}
$ci_next = 0;
$ci_close_next = $ci_close;
}
#----------------------
# Section 4.1.2 Ternary
#----------------------
elsif ( $type eq '?' ) {
$container_type = 'Ternary';
if ( $rparent->{_container_type} eq 'List'
&& !$rparent->{_ci_open_next} )
{
$ci_this = 0;
$ci_close = 0;
}
# redo ci of any preceding comments if necessary
# at an outermost ? (which has no level jump)
if ( !$opening_level_jump ) {
$redo_preceding_comment_ci->( $KK, $ci_this );
}
}
#-------------------------------
# Section 4.1.3 Logical or List?
#-------------------------------
else {
my $is_logical = $is_container_label_type_for_ci{$last_type}
&& $is_logical_container_for_ci{$last_token}
# Part 1 of optional patch to get agreement with previous
# ci This makes almost no difference in a typical program
# because we will seldom break within an array index.
|| $type eq '[' && SET_CI_OPTION_0;
if ( !$is_logical && $token eq '(' ) {
# 'foreach' and 'for' paren contents are treated as
# logical except for C-style 'for'
if ( $last_type eq 'k' ) {
$is_logical ||= $last_token eq 'foreach';
# C-style 'for' container will be type 'List'
if ( $last_token eq 'for' ) {
$is_logical =
!( $rtype_count && $rtype_count->{'f'} );
}
}
# Check for 'for' and 'foreach' loops with iterators
elsif ( $last_type eq 'i' && defined($Kcn) ) {
my $seqno_kcn = $rLL->[$Kcn]->[_TYPE_SEQUENCE_];
my $type_kcn = $rLL->[$Kcn]->[_TOKEN_];
if ( $seqno_kcn && $type_kcn eq '{' ) {
my $block_type_kcn =
$rblock_type_of_seqno->{$seqno_kcn};
$is_logical ||= $block_type_kcn
&& ( $block_type_kcn eq 'for'
|| $block_type_kcn eq 'foreach' );
}
# Search backwards for 'for'/'foreach' with
# iterator in case user is running from an editor
# and did not include the block (fixes case
# 'xci.in').
my $Km = $self->K_previous_code($KK);
foreach ( 0 .. 2 ) {
$Km = $self->K_previous_code($Km);
last unless defined($Km);
last unless ( $rLL->[$Km]->[_TYPE_] eq 'k' );
my $tok = $rLL->[$Km]->[_TOKEN_];
next if ( $tok eq 'my' );
$is_logical ||=
( $tok eq 'for' || $tok eq 'foreach' );
last;
}
}
elsif ( $last_token eq '(' ) {
$is_logical ||=
$rparent->{_container_type} eq 'Logical';
}
else {
# does not look like a logical paren
}
}
#------------------------
# Section 4.1.3.1 Logical
#------------------------
if ($is_logical) {
$container_type = 'Logical';
# Pass ci though an '!'
if ( $last_type eq '!' ) { $ci_this = $ci_last }
$ci_next_next = 0;
$ci_close_next = $ci_this;
# Part 2 of optional patch to get agreement with
# previous ci
if ( $type eq '[' && SET_CI_OPTION_0 ) {
$ci_next_next = $ci_this;
# Undo ci at a chain of indexes or hash keys
if ( $last_type eq '}' ) {
$ci_this = $ci_last;
}
}
if ($opening_level_jump) {
$ci_next = 0;
}
}
#---------------------
# Section 4.1.3.2 List
#---------------------
else {
# Here 'List' is a catchall for none of the above types
$container_type = 'List';
# lists in blocks ...
if ( $rparent->{_container_type} eq 'Block' ) {
# undo ci if another closing token follows
if ( defined($Kcn) ) {
my $closing_level_jump =
$rLL->[$Kcn]->[_LEVEL_] - $level;
if ( $closing_level_jump < 0 ) {
$ci_close = $ci_this;
}
}
}
# lists not in blocks ...
else {
if ( !$rparent->{_comma_count} ) {
$ci_close = $ci_this;
# undo ci at binary op after right paren if no
# commas in container; fixes t027, t028
if ( $ci_close_next != $ci_close
&& defined($Kcn)
&& $bin_op_type{ $rLL->[$Kcn]->[_TYPE_] } )
{
$ci_close_next = $ci_close;
}
}
if ( $rparent->{_container_type} eq 'Ternary' ) {
$ci_next = 0;
}
}
# Undo ci at a chain of indexes or hash keys
if ( $token ne '(' && $last_type eq '}' ) {
$ci_this = $ci_close = $ci_last;
}
}
}
#---------------------------------------
# Section 4.1.4 Store opening token info
#---------------------------------------
# Most closing tokens should align with their opening tokens.
if (
$type eq '{'
&& $token ne '('
&& $is_list_end_type{$last_type}
# avoid asub blocks, which may have prototypes ending in '}'
&& !$ris_asub_block->{$seqno}
)
{
$ci_close = $ci_this;
}
# Closing ci must never be less than opening
if ( $ci_close < $ci_this ) { $ci_close = $ci_this }
push @{$rstack}, $rparent;
$rparent = {
_seqno => $seqno,
_container_type => $container_type,
_ci_next_next => $ci_next_next,
_ci_open => $ci_this,
_ci_open_next => $ci_next,
_ci_close => $ci_close,
_ci_close_next => $ci_close_next,
_comma_count => $comma_count,
_semicolon_count => $semicolon_count,
_Kc => $Kc,
};
}
#-------------------------------------
# Section 4.2 Closing container tokens
#-------------------------------------
else {
my $seqno_test = $rparent->{_seqno};
if ( $seqno_test ne $seqno ) {
# Shouldn't happen if we are processing balanced text.
# (Unbalanced text should go out verbatim)
DEVEL_MODE
&& Fault("stack error: $seqno_test != $seqno\n");
}
# Use ci_this, ci_next values set by the matching opening token:
$ci_this = $rparent->{_ci_close};
$ci_next = $rparent->{_ci_close_next};
my $ci_open_old = $rparent->{_ci_open};
# Then pop the stack and use the parent ci_next_next value:
if ( @{$rstack} ) {
$rparent = pop @{$rstack};
$ci_next_next = $rparent->{_ci_next_next};
}
else {
# Shouldn't happen if we are processing balanced text.
DEVEL_MODE && Fault("empty stack - shouldn't happen\n");
}
# Fix: undo ci at a closing token followed by a closing token.
# Goal is to keep formatting independent of the existence of a
# trailing comma or semicolon.
if ( $ci_this > 0 && !$ci_open_old && !$rparent->{_ci_close} ) {
my $Kc = $rparent->{_Kc};
my $Kn = $self->K_next_code($KK);
if ( $Kc && $Kn && $Kc == $Kn ) {
$ci_this = $ci_next = 0;
}
}
}
}
#---------------------------------
# Section 5. Semicolons and Labels
#---------------------------------
# The next token after a ';' and label (type 'J') starts a new stmt
# The ci after a C-style for ';' (type 'f') is handled similarly.
elsif ( $type eq ';' || $type eq 'J' || $type eq 'f' ) {
$ci_next = 0;
if ( $is_closing_type{$last_type} ) { $ci_this = $ci_last }
}
#--------------------
# Section 6. Keywords
#--------------------
# Undo ci after a format statement
elsif ( $type eq 'k' ) {
if ( substr( $token, 0, 6 ) eq 'format' ) {
$ci_next = 0;
}
}
#------------------
# Section 7. Commas
#------------------
# A comma and the subsequent item normally have ci undone
# unless ci has been set at a lower level
elsif ( $type eq ',' ) {
if ( $rparent->{_container_type} eq 'List' ) {
$ci_this = $ci_next = $rparent->{_ci_open_next};
}
}
else {
# not a special ci type
}
# Save debug info if requested
DEBUG_SET_CI && do {
my $seqno = $rtoken_K->[_TYPE_SEQUENCE_];
my $level = $rtoken_K->[_LEVEL_];
my $ci = $rtoken_K->[_CI_LEVEL_];
if ( $ci > 1 ) { $ci = 1 }
my $tok = $token;
my $last_tok = $last_token;
$tok =~ s/\t//g;
$last_tok =~ s/\t//g;
$tok = length($tok) > 3 ? substr( $tok, 0, 8 ) : $tok;
$last_tok =
length($last_tok) > 3 ? substr( $last_tok, 0, 8 ) : $last_tok;
$tok =~ s/["']//g;
$last_tok =~ s/["']//g;
my $block_type;
$block_type = $rblock_type_of_seqno->{$seqno} if ($seqno);
$block_type = EMPTY_STRING unless ($block_type);
my $ptype = $rparent->{_container_type};
my $pname = $ptype;
my $error =
$ci_this == $ci ? EMPTY_STRING : $type eq 'b' ? "error" : "ERROR";
if ($error) { $saw_ci_diff{$KK} = 1 }
my $lno = $rtoken_K->[_LINE_INDEX_] + 1;
$debug_lines[$KK] = <<EOM;
$lno\t$ci\t$ci_this\t$ci_next\t$last_type\t$last_tok\t$type\t$tok\t$seqno\t$level\t$pname\t$block_type\t$error
EOM
};
#----------------------------------
# Store the ci value for this token
#----------------------------------
$rtoken_K->[_CI_LEVEL_] = $ci_this;
# Remember last nonblank, non-comment token info for the next pass
$ci_last = $ci_this;
$last_token = $token;
$last_type = $type;
} ## End main loop over tokens
#----------------------
# Post-loop operations:
#----------------------
if (DEBUG_SET_CI) {
my @output_lines;
foreach my $Kd ( 0 .. $Klimit ) {
my $line = $debug_lines[$Kd];
if ($line) {
my $Kp = $self->K_previous_code($Kd);
my $Kn = $self->K_next_code($Kd);
if ( DEBUG_SET_CI > 1
|| $Kp && $saw_ci_diff{$Kp}
|| $saw_ci_diff{$Kd}
|| $Kn && $saw_ci_diff{$Kn} )
{
push @output_lines, $line;
}
}
}
if (@output_lines) {
unshift @output_lines, <<EOM;
lno\tci\tci_this\tci_next\tlast_type\tlast_tok\ttype\ttok\tseqno\tlevel\tpname\tblock_type\terror?
EOM
foreach my $line (@output_lines) {
chomp $line;
print {*STDOUT} $line, "\n";
}
}
}
return;
} ## end sub set_ci
sub set_CODE_type {
my ($self) = @_;
# Examine each line of code and set a flag '$CODE_type' to describe it.
# Also return a list of lines with side comments.
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rOpts_format_skipping_begin = $rOpts->{'format-skipping-begin'};
my $rOpts_format_skipping_end = $rOpts->{'format-skipping-end'};
my $rOpts_static_block_comment_prefix =
$rOpts->{'static-block-comment-prefix'};
# Remember indexes of lines with side comments
my @ix_side_comments;
my $In_format_skipping_section = 0;
my $Saw_VERSION_in_this_file = 0;
my $has_side_comment = 0;
my $last_line_had_side_comment = 0;
my ( $Kfirst, $Klast );
my $CODE_type;
# Loop to set CODE_type
# Possible CODE_types
# 'VB' = Verbatim - line goes out verbatim (a quote)
# 'FS' = Format Skipping - line goes out verbatim
# 'BL' = Blank Line
# 'HSC' = Hanging Side Comment - fix this hanging side comment
# 'SBCX'= Static Block Comment Without Leading Space
# 'SBC' = Static Block Comment
# 'BC' = Block Comment - an ordinary full line comment
# 'IO' = Indent Only - line goes out unchanged except for indentation
# 'NIN' = No Internal Newlines - line does not get broken
# 'VER' = VERSION statement
# '' = ordinary line of code with no restrictions
#--------------------
# Loop over all lines
#--------------------
my $ix_line = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$ix_line++;
my $line_type = $line_of_tokens->{_line_type};
my $last_CODE_type = $CODE_type;
# Set default to be ordinary code
$CODE_type = EMPTY_STRING;
#-------------------------------------
# This is only for lines marked 'CODE'
#-------------------------------------
if ( $line_type ne 'CODE' ) {
next;
}
my $input_line = $line_of_tokens->{_line_text};
my $Klast_prev = $Klast;
( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
my $jmax = defined($Kfirst) ? $Klast - $Kfirst : -1;
my $is_block_comment;
if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
if ( $jmax == 0 ) { $is_block_comment = 1; }
else { $has_side_comment = 1 }
}
#-----------------------------------------------------------
# Write line verbatim if we are in a formatting skip section
#-----------------------------------------------------------
if ($In_format_skipping_section) {
# Note: extra space appended to comment simplifies pattern matching
if (
$is_block_comment
# optional fast pre-check
&& ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#>>>'
|| $rOpts_format_skipping_end )
&& ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
/$format_skipping_pattern_end/
)
{
$In_format_skipping_section = 0;
my $input_line_no = $line_of_tokens->{_line_number};
write_logfile_entry(
"Line $input_line_no: Exiting format-skipping section\n");
}
elsif (
$is_block_comment
# optional fast pre-check
&& ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
|| $rOpts_format_skipping_begin )
&& $rOpts_format_skipping
&& ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
/$format_skipping_pattern_begin/
)
{
# warn of duplicate starting comment lines, git #118
my $input_line_no = $line_of_tokens->{_line_number};
warning(
"Already in format-skipping section which started at line $In_format_skipping_section\n",
$input_line_no
);
}
else {
# not at a format skipping control line
}
$CODE_type = 'FS';
next;
}
#----------------------------
# Check for a continued quote
#----------------------------
if ( $line_of_tokens->{_starting_in_quote} ) {
# A line which is entirely a quote or pattern must go out
# verbatim. Note: the \n is contained in $input_line.
if ( $jmax <= 0 ) {
if ( $self->[_save_logfile_] && $input_line =~ /\t/ ) {
my $input_line_number = $line_of_tokens->{_line_number};
$self->note_embedded_tab($input_line_number);
}
$CODE_type = 'VB';
next;
}
}
#-------------------------------------------------
# See if we are entering a formatting skip section
#-------------------------------------------------
if (
$is_block_comment
# optional fast pre-check
&& ( substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 4 ) eq '#<<<'
|| $rOpts_format_skipping_begin )
&& $rOpts_format_skipping
&& ( $rLL->[$Kfirst]->[_TOKEN_] . SPACE ) =~
/$format_skipping_pattern_begin/
)
{
my $input_line_no = $line_of_tokens->{_line_number};
$In_format_skipping_section = $input_line_no;
write_logfile_entry(
"Line $input_line_no: Entering format-skipping section\n");
$CODE_type = 'FS';
next;
}
# ignore trailing blank tokens (they will get deleted later)
if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
$jmax--;
}
#-----------
# Blank line
#-----------
if ( $jmax < 0 ) {
$CODE_type = 'BL';
next;
}
#---------
# Comments
#---------
if ($is_block_comment) {
# see if this is a static block comment (starts with ## by default)
my $is_static_block_comment = 0;
my $no_leading_space = substr( $input_line, 0, 1 ) eq '#';
if (
# optional fast pre-check
(
substr( $rLL->[$Kfirst]->[_TOKEN_], 0, 2 ) eq '##'
|| $rOpts_static_block_comment_prefix
)
&& $rOpts_static_block_comments
&& $input_line =~ /$static_block_comment_pattern/
)
{
$is_static_block_comment = 1;
}
# Check for comments which are line directives
# Treat exactly as static block comments without leading space
# reference: perlsyn, near end, section Plain Old Comments (Not!)
# example: '# line 42 "new_filename.plx"'
if (
$no_leading_space
&& $input_line =~ m{^\# \s*
line \s+ (\d+) \s*
(?:\s("?)([^"]+)\2)? \s*
$}x
)
{
$is_static_block_comment = 1;
}
# look for hanging side comment ...
if (
$last_line_had_side_comment # this follows as side comment
&& !$no_leading_space # with some leading space, and
&& !$is_static_block_comment # this is not a static comment
)
{
# continuing an existing HSC chain?
if ( $last_CODE_type eq 'HSC' ) {
$has_side_comment = 1;
$CODE_type = 'HSC';
next;
}
# starting a new HSC chain?
if (
$rOpts->{'hanging-side-comments'} # user is allowing
# hanging side comments
# like this
&& ( defined($Klast_prev) && $Klast_prev > 1 )
# and the previous side comment was not static (issue c070)
&& !(
$rOpts->{'static-side-comments'}
&& $rLL->[$Klast_prev]->[_TOKEN_] =~
/$static_side_comment_pattern/
)
)
{
# and it is not a closing side comment (issue c070).
my $K_penult = $Klast_prev - 1;
$K_penult -= 1 if ( $rLL->[$K_penult]->[_TYPE_] eq 'b' );
my $follows_csc =
( $rLL->[$K_penult]->[_TOKEN_] eq '}'
&& $rLL->[$K_penult]->[_TYPE_] eq '}'
&& $rLL->[$Klast_prev]->[_TOKEN_] =~
/$closing_side_comment_prefix_pattern/ );
if ( !$follows_csc ) {
$has_side_comment = 1;
$CODE_type = 'HSC';
next;
}
}
}
if ($is_static_block_comment) {
$CODE_type = $no_leading_space ? 'SBCX' : 'SBC';
next;
}
elsif ( $last_line_had_side_comment
&& !$rOpts_maximum_consecutive_blank_lines
&& $rLL->[$Kfirst]->[_LEVEL_] > 0 )
{
# Emergency fix to keep a block comment from becoming a hanging
# side comment. This fix is for the case that blank lines
# cannot be inserted. There is related code in sub
# 'process_line_of_CODE'
$CODE_type = 'SBCX';
next;
}
else {
$CODE_type = 'BC';
next;
}
}
#-------------------------
# Other special code types
#-------------------------
if ($rOpts_indent_only) {
$CODE_type = 'IO';
next;
}
if ( !$rOpts_add_newlines ) {
$CODE_type = 'NIN';
next;
}
# Patch needed for MakeMaker. Do not break a statement
# in which $VERSION may be calculated. See MakeMaker.pm;
# this is based on the coding in it.
# The first line of a file that matches this will be eval'd:
# /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
# Examples:
# *VERSION = \'1.01';
# ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
# We will pass such a line straight through without breaking
# it unless -npvl is used.
# Patch for problem reported in RT #81866, where files
# had been flattened into a single line and couldn't be
# tidied without -npvl. There are two parts to this patch:
# First, it is not done for a really long line (80 tokens for now).
# Second, we will only allow up to one semicolon
# before the VERSION. We need to allow at least one semicolon
# for statements like this:
# require Exporter; our $VERSION = $Exporter::VERSION;
# where both statements must be on a single line for MakeMaker
if ( !$Saw_VERSION_in_this_file
&& $jmax < 80
&& $input_line =~
/^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
{
$Saw_VERSION_in_this_file = 1;
write_logfile_entry("passing VERSION line; -npvl deactivates\n");
# This code type has lower priority than others
$CODE_type = 'VER';
next;
}
}
continue {
$line_of_tokens->{_code_type} = $CODE_type;
$last_line_had_side_comment = $has_side_comment;
if ($has_side_comment) {
push @ix_side_comments, $ix_line;
$has_side_comment = 0;
}
}
return \@ix_side_comments;
} ## end sub set_CODE_type
sub block_seqno_of_paren_keyword {
my ( $self, $KK ) = @_;
# Find brace at '){' after keyword such as for, foreach, ...
# SEE ALSO: sub block_seqno_of_paren_seqno
# Given:
# $KK = index of a keyword followed by parens and block '... ( ) {'
# such as 'for', 'foreach', 'while', 'if', 'elsif' ..
# Return:
# $seqno of the opening block brace for this keyword, if any
# $K_end_iterator = index of the last token of an iterator, if any
# or
# nothing if not found
# 'for my $var (..) { ... }'
# ^ ^
# | |
# --$KK --$seqno of brace that we want
my $rLL = $self->[_rLL_];
# look ahead for an opening paren
my $K_paren = $self->[_rK_next_seqno_by_K_]->[$KK];
return unless defined($K_paren);
my $token_paren = $rLL->[$K_paren]->[_TOKEN_];
return unless ( $token_paren eq '(' );
# found a paren, but does it belong to this keyword?
my $seqno_paren = $rLL->[$K_paren]->[_TYPE_SEQUENCE_];
# see if this opening paren immediately follows the keyword
my $K_n = $self->K_next_code($KK);
return unless $K_n;
# is it the next token? this is the common case
my $K_end_iterator;
my $saw_my;
my $token_KK = $rLL->[$KK]->[_TOKEN_];
if ( $K_n != $K_paren ) {
# look for 'for $var (', 'for my $var (', 'for my (', 'for $var ('
if ( $is_for_foreach{$token_KK} ) {
my $type_K_n = $rLL->[$K_n]->[_TYPE_];
my $token_K_n = $rLL->[$K_n]->[_TOKEN_];
# skip past a 'my'
if ( $type_K_n eq 'k' ) {
if ( $is_my_state_our{$token_K_n} ) {
$K_n = $self->K_next_code($K_n);
$saw_my = 1;
}
else { return }
}
# skip an identifier
if ( $K_n && $K_n != $K_paren && $rLL->[$K_n]->[_TYPE_] eq 'i' ) {
$K_n = $self->K_next_code($K_n);
# force this iterator to be entered as new lexical
$K_end_iterator = $K_paren;
}
}
}
# we must be at the paren
return unless ( $K_n && $K_n == $K_paren );
# now jump to the closing paren
$K_paren = $self->[_K_closing_container_]->{$seqno_paren};
# then look for the opening brace immediately after it
my $K_brace = $self->K_next_code($K_paren);
return unless ($K_brace);
# check for experimental 'for list': for my ( $a, $b) (@list) {
# ^
if ( $rLL->[$K_brace]->[_TOKEN_] eq '('
&& !$K_end_iterator
&& $is_for_foreach{$token_KK} )
{
if ( !$saw_my ) { $K_end_iterator = $K_brace }
my $seqno_test = $rLL->[$K_brace]->[_TYPE_SEQUENCE_];
my $K_test = $self->[_K_closing_container_]->{$seqno_test};
return unless $K_test;
$K_brace = $self->K_next_code($K_test);
return unless ($K_brace);
}
return unless ( $rLL->[$K_brace]->[_TOKEN_] eq '{' );
my $seqno_brace = $rLL->[$K_brace]->[_TYPE_SEQUENCE_];
return unless ($seqno_brace);
my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno_brace};
# Verify that this is the correct brace
if ( $block_type ne $token_KK ) {
# If not, this is unexpected and should be investigated
# (the block type may have been mis-marked)
my $lno = $rLL->[$KK]->[_LINE_INDEX_] + 1;
DEVEL_MODE && Fault(<<EOM);
at line $lno: found block type $block_type: expecting $token_KK - please check
EOM
return;
}
return ( $seqno_brace, $K_end_iterator );
} ## end sub block_seqno_of_paren_keyword
sub has_complete_package {
my ($self) = @_;
# return true if this file appears to contain at least one complete package
my $Klast = $self->K_last_code();
return unless defined($Klast);
my $rLL = $self->[_rLL_];
my $rK_package_list = $self->[_rK_package_list_];
return unless ( defined($rK_package_list) && @{$rK_package_list} );
# look for a file like this:
# package A::B
# ...
# 1;
my $KK = $rK_package_list->[0];
my $item = $rLL->[$KK];
my $type = $item->[_TYPE_];
# Stored K values may be off by 1 due to an added blank
if ( $type eq 'b' ) {
$KK += 1;
$item = $rLL->[$KK];
$type = $item->[_TYPE_];
}
# safety check - shouldn't happen
if ( $type ne 'P' ) {
DEVEL_MODE && Fault("Expecting type 'P' but found '$type'");
return;
}
my $level = $item->[_LEVEL_];
return unless ( $level == 0 );
# Look for '1;' at next package, if any, and at end of file
my @K_semicolon_test = ($Klast);
if ( @{$rK_package_list} > 1 ) {
my $K_package = $rK_package_list->[1];
my $Ktest = $self->K_previous_code($K_package);
push @K_semicolon_test, $Ktest;
}
foreach my $Ktest (@K_semicolon_test) {
if ( $rLL->[$Ktest]->[_TYPE_] eq 'b' ) { $Ktest -= 1 }
if ( $Ktest > $KK && $Ktest && $rLL->[$Ktest]->[_TYPE_] eq ';' ) {
my $K1 = $self->K_previous_code($Ktest);
if ( $K1 && $rLL->[$K1]->[_TOKEN_] eq '1' ) {
return 1;
}
}
}
return;
} ## end sub has_complete_package
sub is_complete_script {
my ( $self, $rline_type_count, $rkeyword_count ) = @_;
# Guess if we are formatting a complete script
# Given:
# $rline_type_count = hash ref of count of line types
# $rkeyword_count = hash ref of count of keywords
# Return: true or false
# Goal: help decide if we should skip certain warning checks when
# operating on just part of a script (such as from an editor).
#----------------------------------------------------------------
# TEST 1: Assume a file with known extension is a complete script
#----------------------------------------------------------------
my %is_standard_file_extension = (
'pm' => 1,
'pl' => 1,
'plx' => 1,
't' => 1,
);
my $input_stream_name = get_input_stream_name();
# look for a file extension
my $pos_dot = rindex( $input_stream_name, '.' );
my $file_extension = EMPTY_STRING;
if ( $pos_dot > 0 ) {
$file_extension = substr( $input_stream_name, $pos_dot + 1 );
# allow additional digits, like .pm.0, .pm.1 etc
if ( defined($file_extension)
&& length($file_extension)
&& $file_extension =~ /^\d+$/ )
{
my $str = substr( $input_stream_name, 0, $pos_dot );
$pos_dot = rindex( $str, '.' );
if ( $pos_dot > 0 ) {
$file_extension = substr( $str, $pos_dot + 1 );
}
}
return 1 if $is_standard_file_extension{ lc($file_extension) };
}
#-------------------------------------------------------------
# TEST 2: a positive starting level implies an incomplete script
#-------------------------------------------------------------
my $rLL = $self->[_rLL_];
return unless ( @{$rLL} );
my $sil = $rLL->[0]->[_LEVEL_];
return if ($sil);
#------------------------------------
# TEST 3: look for a complete package
#------------------------------------
return 1 if $self->has_complete_package();
#----------------------------
# TEST 4: examine other clues
#----------------------------
my $rlines = $self->[_rlines_];
my $line_count = @{$rlines};
return unless ($line_count);
my $input_line = $rlines->[0]->{_line_text};
my $saw_hash_bang = substr( $input_line, 0, 2 ) eq '#!'
&& $input_line =~ /^\#\!.*perl\b/;
my $rK_package_list = $self->[_rK_package_list_];
my $saw_package = defined($rK_package_list) && @{$rK_package_list};
my $sub_count = +keys %{ $self->[_ris_sub_block_] };
my $use_count = 0;
$use_count += $rkeyword_count->{use} if $rkeyword_count->{use};
$use_count += $rkeyword_count->{require} if $rkeyword_count->{require};
# Make a guess using the available clues. No single clue is conclusive.
my $score = 0;
# starting indicators
$score += 50
if ( $saw_hash_bang
|| $self->[_saw_use_strict_]
|| $saw_package );
$score +=
$use_count > 1 ? 50
: $use_count > 0 ? 25
: 0;
# interior indicators
$score +=
$line_count > 50 ? 50
: $line_count > 25 ? 25
: 0;
$score +=
$sub_count > 1 ? 50
: $sub_count > 0 ? 25
: 0;
# common filter keywords
foreach (qw( exit print printf open system exec die )) {
if ( $rkeyword_count->{$_} ) { $score += 50; last; }
}
$score += 50 if $rline_type_count->{POD};
# ending indicator
$score += 50 if $self->[_saw_END_or_DATA_];
if ( $score >= 100 ) { return 1 }
return;
} ## end sub is_complete_script
use constant DEBUG_USE_CONSTANT => 0;
sub get_qw_list {
my ( $self, $Kn ) = @_;
# Given:
# $Kn = index of start of a qw quote
# Return:
# ($K_last_q, \@list) to list of words, or
# nothing if error
my $rLL = $self->[_rLL_];
return unless defined($Kn);
my $type_n = $rLL->[$Kn]->[_TYPE_];
return unless ( $type_n eq 'q' );
my $token_n = $rLL->[$Kn]->[_TOKEN_];
my $K_last_q = $Kn;
# collect a multi-line qw
my $string = $token_n;
foreach my $Knn ( $Kn + 1 .. @{$rLL} - 1 ) {
my $type_nn = $rLL->[$Knn]->[_TYPE_];
next if ( $type_nn eq 'b' );
last if ( $type_nn ne 'q' );
$string .= SPACE . $rLL->[$Knn]->[_TOKEN_];
$K_last_q = $Knn;
}
$string = substr( $string, 2 ); # remove qw
$string =~ s/^\s*//; # trim left
$string = substr( $string, 1 ); # remove opening mark char
$string = substr( $string, 0, -1 ); # remove closing mark char
$string =~ s/^\s*//; # trim left
$string =~ s/\s*$//; # trim right
my @list = split /\s+/, $string;
return ( $K_last_q, \@list );
} ## end sub get_qw_list
sub expand_quoted_word_list {
my ( $self, $Kbeg ) = @_;
# Expand a list quoted words
# Given:
# $Kbeg = index of the start of a list of quoted words
# Returns:
# ref to list if found words
# undef if not successful, or non-constant list item encountered
my $rLL = $self->[_rLL_];
return unless defined($Kbeg);
my $Klimit = @{$rLL} - 1;
my @list;
my $Kn = $Kbeg - 1;
my $is_qwaf_Q;
while ( ++$Kn <= $Klimit ) {
my $type = $rLL->[$Kn]->[_TYPE_];
my $token = $rLL->[$Kn]->[_TOKEN_];
next if ( $type eq 'b' );
next if ( $type eq '#' );
next if ( $type eq ',' );
last if ( $type eq ';' );
last if ( $token eq '}' );
next if ( $token eq '(' );
if ( $token eq ')' ) { $is_qwaf_Q = 0; next }
if ( $type eq 'q' ) {
# qw list
my ( $K_last_q, $rlist ) = $self->get_qw_list($Kn);
return if ( !defined($K_last_q) );
if ( $K_last_q > $Kn ) { $Kn = $K_last_q }
push @list, @{$rlist};
}
elsif ( $type eq 'Q' ) {
my $name;
if ($is_qwaf_Q) {
$name = $token;
}
elsif ( length($token) > 2 ) {
my $ch0 = substr( $token, 0, 1 );
if ( $ch0 eq '"' || $ch0 eq "'" ) {
$name = substr( $token, 1, -1 );
}
else {
my $rQ_info = Q_spy($token);
if ( defined($rQ_info) && $rQ_info->{is_simple} ) {
my $nch = $rQ_info->{nch};
$name = substr( $token, $nch, -1 );
}
}
}
else {
## empty string
}
if ( defined($name) ) { push @list, $name }
}
elsif ( $type eq 'U' ) {
if ( $token eq 'qw' ) {
$Kn = $self->K_next_nonblank($Kn);
return if ( !defined($Kn) || $rLL->[$Kn]->[_TOKEN_] ne '(' );
my $seqno = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
return if ( !defined($seqno) );
return if ( !$self->[_ris_qwaf_by_seqno_]->{$seqno} );
$is_qwaf_Q = $seqno;
}
}
else {
# Give up on anything else..
# some examples where we have to quit:
# @EXPORT = Archive::Tar::Constant->_list_consts( __PACKAGE__ );
# @EXPORT = ( @CONSTANTS, qw( %ALL_CODESETS));
# @EXPORT = ( @{$EXPORT_TAGS{standard}}, ..
return;
}
} ## end while ( ++$Kn <= $Klimit )
return \@list;
} ## end sub expand_quoted_word_list
sub expand_EXPORT_list {
my ( $self, $KK, $rhash ) = @_;
# Given:
# $KK = index of variable @EXPORT or @EXPORT_OK
# $rhash = a hash to fill
# Task:
# Update $rhash with any quoted words which follow any subsequent '='
my $rLL = $self->[_rLL_];
my $Kn = $self->K_next_code($KK);
# Require a following '='
return unless ( $Kn && $rLL->[$Kn]->[_TYPE_] eq '=' );
# Move to the next token
$Kn = $self->K_next_code($Kn);
return unless ($Kn);
# Get any list
my $rlist = $self->expand_quoted_word_list($Kn);
return unless ($rlist);
# Include the listed words in the hash
foreach ( @{$rlist} ) { $rhash->{$_} = 1 }
return;
} ## end sub expand_EXPORT_list
sub scan_variable_usage {
my ( $self, ($roption) ) = @_;
# Scan for unused and reused lexical variables in a single sweep.
# Given:
# $roption = an optional set of types of checks,
# all checks are made if not given
# Return:
# - nothing if no errors found
# - ref to a list of 'warnings', one per variable, in line order.
# Each list item is a hash of values describing the issue. These
# are stored in a list of hash refs, as follows:
# push @warnings,
# {
# name => $name, # name, such as '$var', '%data'
# line_number => $line_number, # line number where defined
# K => $KK, # index of token $name
# keyword => $keyword, # 'my', 'state', 'for', 'foreach'
# letter => $letter, # one of: r s p u
# note => $note, # additional text info
# see_line => $see_line, # line referenced in note
# };
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_sub_block = $self->[_ris_sub_block_];
my $K_closing_container = $self->[_K_closing_container_];
# check for file without code (could be all pod or comments)
return unless defined( $self->K_first_code() );
# issues are indicated by these names:
my %unusual_variable_issue_note = (
c => "unused constant",
p => "package crossing",
r => "reused",
s => "multi-sigil",
u => "unused lexical",
);
# Default is to do all checks if no control hash received (dump mode)
if ( !defined($roption) ) {
foreach my $key ( keys %unusual_variable_issue_note ) {
$roption->{$key} = 1;
}
}
my $issue_type_string = "Issue types are";
foreach my $letter ( reverse sort keys %unusual_variable_issue_note ) {
next if ( !$roption->{$letter} );
my $txt = $unusual_variable_issue_note{$letter};
$issue_type_string .= " '$letter'=$txt";
}
# Unpack the control hash
my $check_sigil = $roption->{'s'};
my $check_cross_package = $roption->{'p'};
my $check_unused = $roption->{'u'};
my $check_reused = $roption->{'r'};
my $check_constant = $roption->{'c'};
my %is_valid_sigil = ( '$' => 1, '@' => 1, '%' => 1 );
# Variables defining current state:
my $current_package = 'main';
# The basic idea of this routine is straightforward:
# - We create a stack of block braces
# - We walk through the tokens in the file
# - At an opening block brace, we push a new stack entry
# - At a closing block brace, we pop the stack,
# and check the count of any 'my' vars (issue 'u')
# - At an identifier, like '$var':
# - if it follows a 'my' we enter it on the stack with starting count 0
# check conflicts with any other vars on the stack (issues 'r' and 's')
# - otherwise, we see if the variable is in the stack, and if so,
# update the count
# - At a package, we see if it has access to existing 'my' vars (issue 'p')
# There are lots of details, but that's the main idea. A difficulty is
# when 'my' vars are created in the control section of blocks such as
# for, foreach, if, unless, .. these follow special rules. The
# way it is done here is to propagate such vars in a special control
# layer stack entry which is pushed on just before these blocks.
my $rblock_stack = [];
my $rconstant_hash = {};
my $ruse_vars_hash = {};
my $rEXPORT_hash = {};
#---------------------------------------
# sub to push a block brace on the stack
#---------------------------------------
my $push_block_stack = sub {
my ( $seqno, $rvars ) = @_;
# push an entry for a new block onto the block stack:
# Given:
# $seqno = the sequence number of the code block
# $rvars = hash of initial identifiers for the block, if given
# will be empty hash ref if not given
if ( !defined($rvars) ) { $rvars = {} }
push @{$rblock_stack},
{ seqno => $seqno, package => $current_package, rvars => $rvars };
return;
}; ## end $push_block_stack = sub
$push_block_stack->(SEQ_ROOT);
# $rhash holds all lexecal variables defined within a given block:
# $rhash->{
# $name => {
# count => $count,
# line_index => $line_index,
# keyword => $keyword,
# package => $package,
# K => $KK
# }
# };
# $name = the variable name, such as '$data', '@list', '%vars',
# $count = number of uses
# $line_index = index of the line where it is defined
# $keyword = 'my' or 'state' or 'for' or 'foreach'
# $package = what package was in effect when it was defined
# $KK = token index (for sorting)
# Variables for a batch of lexical vars being collected:
my $my_keyword; # 'state' or 'my' keyword for this set
my $K_end_my = -1; # max token index of this set
my $in_signature_seqno = 0; # true while scanning a signature
my $my_starting_count = 0; # the initial token count for this set
# Variables for warning messages:
my @warnings; # array of warning messages
my %package_warnings; # warning messages for package cross-over
my %sub_count_by_package; # how many subs defined in a package
# Variables for scanning interpolated quotes:
my $ix_HERE_END = -1; # the line index of the last here target read
my $in_interpolated_quote; # in multiline quote with interpolation?
#-------------------------------------------------------
# sub to check for overlapping usage, issues 'r' and 's'
#-------------------------------------------------------
my $check_for_overlapping_variables = sub {
my ( $name, $KK ) = @_;
# Given:
# $name = a variable with sigil, such as '$var', '%var', '@var';
# $KK = index associated with this variable
# $line_index = index of line where this name first appears
# Task:
# Create a warning if this overlaps a previously defined variable
# Returns:
# true if error, variable is not of expected form with sigil
# false if no error
my $sigil = EMPTY_STRING;
my $word = EMPTY_STRING;
if ( $name =~ /^(\W+)(\w+)$/ ) {
$sigil = $1;
$word = $2;
}
else {
# give up, flag as error
# could be something like '$' or '@' in a signature, or
# for $Storable::downgrade_restricted (0, 1, ...
return 1;
}
# Perform checks for reused names
my @sigils_to_test;
if ($check_sigil) {
if ($check_reused) {
@sigils_to_test = (qw( $ @ % ));
}
else {
foreach my $sig (qw( $ @ % )) {
if ( $sig ne $sigil ) { push @sigils_to_test, $sig; }
}
}
}
elsif ($check_reused) {
push @sigils_to_test, $sigil;
}
else {
# neither
}
# See if this name has been seen, possibly with a different sigil
if (@sigils_to_test) {
# Look at stack and 'use vars' hash
foreach
my $item ( @{$rblock_stack}, $ruse_vars_hash->{$current_package} )
{
# distinguish between stack item and use vars item
my $rhash = defined( $item->{seqno} ) ? $item->{rvars} : $item;
foreach my $sig (@sigils_to_test) {
my $test_name = $sig . $word;
next unless ( $rhash->{$test_name} );
my $first_line = $rhash->{$test_name}->{line_index} + 1;
my $letter;
my $note;
my $see_line = 0;
if ( $sig eq $sigil ) {
my $as_iterator =
defined($my_keyword)
&& ( $my_keyword eq 'for'
|| $my_keyword eq 'foreach' )
? ' as iterator'
: EMPTY_STRING;
$note = "reused$as_iterator - see line $first_line";
$letter = 'r';
}
else {
$see_line = $first_line;
$note =
"overlaps $test_name in scope - see line $see_line";
$letter = 's';
}
my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
push @warnings,
{
name => $name,
keyword => $my_keyword,
note => $note,
see_line => $see_line,
line_number => $line_index + 1,
letter => $letter,
K => $KK,
};
last;
}
}
}
return;
}; ## end $check_for_overlapping_variables = sub
#--------------------------------
# sub to checkin a new identifier
#--------------------------------
my $checkin_new_lexical = sub {
my ($KK) = @_;
# Store the new identifier at index $KK
my $name = $rLL->[$KK]->[_TOKEN_];
my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
# Special checks for signature variables
if ($in_signature_seqno) {
# must be in top signature layer
my $parent = $self->parent_seqno_by_K($KK);
return if ( $parent != $in_signature_seqno );
# must be preceded by a comma or opening paren
my $Kp = $self->K_previous_code($KK);
return if ( !$Kp );
my $token_p = $rLL->[$Kp]->[_TOKEN_];
my $type_p = $rLL->[$Kp]->[_TYPE_];
return if ( $type_p ne ',' && $token_p ne '(' );
}
my $bad_name = $check_for_overlapping_variables->( $name, $KK );
return if ($bad_name);
# Store this lexical variable
my $rhash = $rblock_stack->[-1]->{rvars};
$rhash->{$name} = {
count => $my_starting_count,
line_index => $line_index,
keyword => $my_keyword,
package => $current_package,
K => $KK,
};
return;
}; ## end $checkin_new_lexical = sub
#--------------------------------------------------
# sub to update counts for a list of variable names
#--------------------------------------------------
my $update_use_count = sub {
my ( $sigil_string, $word, $bracket ) = @_;
# Given:
# $sigil_string = a string of leading sigils, like '$', '$$', '@$$'
# $word = the following bareword
# $bracket = a following array or hash bracket or brace, if any
# (token types '[' and 'L')
# Note: any braces around the bareword must have been stripped
# by the caller
# Task:
# Form the hash key ($word, @word, or %word) and update the count
return unless ($check_unused);
return unless ( defined($sigil_string) && defined($word) );
my $sigil = substr( $sigil_string, -1, 1 );
return unless ( $is_valid_sigil{$sigil} );
# Examples:
# input => key
# $var $var
# @var @var
# $var[ @var
# $var{ %var
# @$var $var
# ${var} $var (caller must remove the braces)
# @$var[0..2] $var
# @var[0..2] @var array slice
# @var{w1 w2} %var hash slice
# %var{w1 w2} %var hash slice
my $name;
if ( $bracket && length($sigil_string) == 1 ) {
if ( $bracket eq '{' ) { $sigil = '%' }
elsif ( $bracket eq '[' ) { $sigil = '@' }
else { }
}
$name = $sigil . $word;
foreach my $layer ( reverse( @{$rblock_stack} ) ) {
my $rvars = $layer->{rvars};
if ( $rvars->{$name} ) {
$rvars->{$name}->{count}++;
last;
}
}
return;
}; ## end $update_use_count = sub
my $checkin_new_constant = sub {
my ( $KK, $word ) = @_;
return if ( !defined($word) );
my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
my $rvars = {
count => 0,
line_index => $line_index,
package => $current_package,
K => $KK,
};
$rconstant_hash->{$current_package}->{$word} = $rvars;
return;
}; ## end $checkin_new_constant = sub
my $push_new_EXPORT = sub {
my ( $KK, $package ) = @_;
# Save index of any @EXPORT and @EXPORT_OK lists
$package = $current_package unless ($package);
push @{ $rEXPORT_hash->{$package} }, $KK;
return;
}; ## end $push_new_EXPORT = sub
my $scan_use_vars = sub {
my ($KK) = @_;
my $Kn = $self->K_next_code($KK);
return unless ($Kn);
my $rlist = $self->expand_quoted_word_list($Kn);
return unless ($rlist);
my $line_index = $rLL->[$KK]->[_LINE_INDEX_];
$my_keyword = 'use vars';
foreach my $name ( @{$rlist} ) {
my $bad_name = $check_for_overlapping_variables->( $name, $KK );
next if ($bad_name);
my $rvars = {
line_index => $line_index,
package => $current_package,
K => $KK,
};
$ruse_vars_hash->{$current_package}->{$name} = $rvars;
}
return;
}; ## end $scan_use_vars = sub
my $scan_use_constant = sub {
my ($KK) = @_;
my $Kn = $self->K_next_code($KK);
return unless ($Kn);
my $type_n = $rLL->[$Kn]->[_TYPE_];
my $token_n = $rLL->[$Kn]->[_TOKEN_];
# step past a version
if ( $type_n eq 'n' || $type_n eq 'v' ) {
$Kn = $self->K_next_code($Kn);
$type_n = $rLL->[$Kn]->[_TYPE_];
$token_n = $rLL->[$Kn]->[_TOKEN_];
}
# patch for qw as function (qwaf)
my $is_qwaf_Q;
if ( $type_n eq 'U' && $token_n eq 'qw' ) {
$Kn = $self->K_next_code($Kn);
$type_n = $rLL->[$Kn]->[_TYPE_];
$token_n = $rLL->[$Kn]->[_TOKEN_];
$is_qwaf_Q = 1;
}
if ( $token_n eq '(' ) {
$Kn = $self->K_next_code($Kn);
$type_n = $rLL->[$Kn]->[_TYPE_];
$token_n = $rLL->[$Kn]->[_TOKEN_];
}
# use constant _meth1_=>1;
if ( $type_n eq 'w' ) {
$checkin_new_constant->( $Kn, $token_n );
}
# use constant '_meth1_',1 or other quote type
elsif ( $type_n eq 'Q' ) {
# This Q token is assumed to be a single token
my $name;
if ($is_qwaf_Q) {
$name = $token_n;
}
elsif ( length($token_n) > 2 ) {
my $ch0 = substr( $token_n, 0, 1 );
if ( $ch0 eq '"' || $ch0 eq "'" ) {
$name = substr( $token_n, 1, -1 );
}
else {
my $rQ_info = Q_spy($token_n);
if ( defined($rQ_info) && $rQ_info->{is_simple} ) {
my $nch = $rQ_info->{nch};
$name = substr( $token_n, $nch, -1 );
}
}
}
else {
## empty string
}
$checkin_new_constant->( $Kn, $name ) if ( defined($name) );
}
# use constant qw(_meth2_ 2);
elsif ( $type_n eq 'q' ) {
my $name;
if ( $token_n =~ /qw\s*.(\w+)/ ) {
$name = $1;
$checkin_new_constant->( $Kn, $name );
}
}
# A hash ref with multiple definitions:
# use constant { _meth3_=>3, _meth4_=>4};
# use constant { '_meth3_',3, '_meth4_',4};
elsif ( $type_n eq '{' && $token_n eq '{' ) {
my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
return unless $seqno_n;
my $Kc = $self->[_K_closing_container_]->{$seqno_n};
return unless $Kc;
# loop to collect constants in hash ref
my $Knn = $self->K_next_code($Kn);
my $total_comma_count = 0;
my $last_type = ',';
my $level_start = $rLL->[$Knn]->[_LEVEL_];
foreach my $Kx ( $Knn .. $Kc - 1 ) {
my $type = $rLL->[$Kx]->[_TYPE_];
my $token = $rLL->[$Kx]->[_TOKEN_];
next if ( $type eq 'b' || $type eq '#' );
my $level = $rLL->[$Kx]->[_LEVEL_];
next if ( $level > $level_start );
if ( $level < $level_start ) {
## shouldn't happen
my $lno = $rLL->[$Kx]->[_LINE_INDEX_] + 1;
DEBUG_USE_CONSTANT
&& Fault("$lno: level=$level > start=$level_start\n");
return;
}
if ( $last_type eq ',' && !( $total_comma_count % 2 ) ) {
if ( $type eq 'w' ) {
$checkin_new_constant->( $Kx, $token );
}
elsif ( $type eq 'Q' ) {
if ( length($token) < 3 ) { return }
my $ch0 = substr( $token, 0, 1 );
my $name;
if ( $ch0 eq '"' || $ch0 eq "'" ) {
$name = substr( $token, 1, -1 );
}
else {
my $rQ_info = Q_spy($token);
if ( defined($rQ_info) && $rQ_info->{is_simple} ) {
my $nch = $rQ_info->{nch};
$name = substr( $token, $nch, -1 );
}
}
$checkin_new_constant->( $Kx, $name );
}
else {
my $lno = $rLL->[$Kx]->[_LINE_INDEX_] + 1;
DEBUG_USE_CONSTANT
&& Fault(
"$lno: unexpected type: type=$type token=$token\n");
return;
}
}
else {
if ( $type eq ',' || $type eq '=>' ) {
$total_comma_count++;
}
}
$last_type = $type;
}
}
elsif ( $type_n eq ';' ) {
}
else {
my $ln = $rLL->[$KK]->[_LINE_INDEX_] + 1;
DEBUG_USE_CONSTANT && Fault("$ln: unknown use constant syntax\n");
}
return;
}; ## end $scan_use_constant = sub
my $update_constant_count = sub {
my ( $KK, $word ) = @_;
if ( !defined($word) ) { $word = $rLL->[$KK]->[_TOKEN_] }
my $package = $current_package;
my $pos = rindex( $word, '::' );
if ( $pos >= 0 ) {
$package = $pos > 0 ? substr( $word, 0, $pos ) : 'main';
$word = substr( $word, $pos + 2 );
}
return if ( !defined( $rconstant_hash->{$package} ) );
my $rvars = $rconstant_hash->{$package}->{$word};
return if ( !defined($rvars) );
return if ( $KK <= $rvars->{K} );
$rvars->{count}++;
return;
}; ## end $update_constant_count = sub
#-----------------------------------------------
# sub to check for zero counts when stack closes
#-----------------------------------------------
my $check_for_unused_names = sub {
my ($rhash) = @_;
foreach my $name ( keys %{$rhash} ) {
my $entry = $rhash->{$name};
my $count = $entry->{count};
my $keyword = $entry->{keyword};
if ( !$count ) {
# Typically global vars are for external access so we
# do not report them as type 'u' (unused)
# NOTE: 'use vars' is not currently needed in the following
# test but is retained in case coding ever changes
next if ( $keyword eq 'our' || $keyword eq 'use vars' );
push @warnings,
{
name => $name,
keyword => $entry->{keyword},
note => EMPTY_STRING,
see_line => 0,
line_number => $entry->{line_index} + 1,
letter => 'u',
K => $entry->{K},
};
}
}
return;
}; ## end $check_for_unused_names = sub
#---------------------------------------
# sub to scan interpolated text for vars
#---------------------------------------
my $scan_quoted_text = sub {
my ($text) = @_;
return unless ($check_unused);
# Looking for something like $word, @word, $word[, $$word, ${word}, ..
while ( $text =~ / ([\$\@] [\$]*) \{?(\w+)\}? ([\[\{]?) /gcx ) {
## ------1------ -2- ---3---
my $sigil_string = $1;
my $word = $2;
my $brace = $3;
$update_use_count->( $sigil_string, $word, $brace );
} ## end while ( $text =~ ...)
return;
}; ## end $scan_quoted_text = sub
#-------------------------------------------------------------
# sub to find the next opening brace seqno of an if-elsif- chain
#-------------------------------------------------------------
my $push_next_if_chain = sub {
my ( $KK, $rpopped_vars ) = @_;
# Given:
# $KK = index of a closing block brace of if/unless/elsif chain
# $rpopped_vars = values just popped off the stack
# Task:
# - do nothing if chain ends, or
# - push $rpopped_vars onto the next block in the chain
# $seqno_block = sequence number of next opening block in the chain,
my $seqno_block;
my $K_n = $self->K_next_code($KK);
return unless ($K_n);
return unless ( $rLL->[$K_n]->[_TYPE_] eq 'k' );
# For an 'elsif' the brace will be after the closing paren
# 'elsif (..) { ... }'
# ^ ^
# | |
# --$KK --$seqno of brace that we want
#
if ( $rLL->[$K_n]->[_TOKEN_] eq 'elsif' ) {
( $seqno_block, my $K_last_iterator_uu ) =
$self->block_seqno_of_paren_keyword($K_n);
}
# For an 'else' the brace will be the next token
# 'else { ... }'
# ^ ^
# | |
# --$KK --$seqno of brace that we want
#
elsif ( $rLL->[$K_n]->[_TOKEN_] eq 'else' ) {
my $K_nn = $self->K_next_code($K_n);
if ( $K_nn
&& $is_opening_token{ $rLL->[$K_nn]->[_TOKEN_] } )
{
$seqno_block = $rLL->[$K_nn]->[_TYPE_SEQUENCE_];
}
}
else {
# chain ends if no elsif/else block
}
if ( $seqno_block
&& $rblock_type_of_seqno->{$seqno_block} )
{
$push_block_stack->( $seqno_block, $rpopped_vars );
}
return;
}; ## end $push_next_if_chain = sub
my $scan_braced_id = sub {
my ($KK) = @_;
# We are at an opening brace and looking for something like this:
# @{word}[@var]
# ${word}
# ^
# |
# -- $KK
return unless ($check_unused);
# Look back for the sigil
my $Kp = $self->K_previous_code($KK);
return unless ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 't' );
my $sigil_string = $rLL->[$Kp]->[_TOKEN_];
# Look forward for the bareword
my $Kn = $self->K_next_code($KK);
return unless ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'w' );
my $word = $rLL->[$Kn]->[_TOKEN_];
# Look forward for the closing brace
my $Knn = $self->K_next_code($Kn);
return unless ( defined($Knn) && $rLL->[$Knn]->[_TYPE_] eq 'R' );
# Look forward for a possible { or [
my $bracket;
my $Knnn = $self->K_next_code($Knn);
if ( defined($Knnn) ) {
my $next_type = $rLL->[$Knnn]->[_TYPE_];
if ( $next_type eq 'L' || $next_type eq '[' ) {
$bracket = $rLL->[$Knnn]->[_TOKEN_];
}
}
$update_use_count->( $sigil_string, $word, $bracket );
return;
}; ## end $scan_braced_id = sub
my $check_sub_signature = sub {
my ($KK) = @_;
# looking for a sub signature
# sub xxx (...) {
# -------
# | | | |
# $KK $Kn | |
# $K_opening_brace
# Note: this version cannot handle signatures within signatures.
# Inner signatures are currently ignored. For example, only the
# outermost $a below will be checked in this line:
# sub xyz ($a = sub ($a) { $a."z" }) { $a->("a")."y" }
# What happens is that variable $K_end_my is set by the first
# signature, and the second signature is within it and so does
# not get activated. A stack scheme would be necessary to handle
# this, but does not seem necessary because this probably only
# occurs in test code, and the only downside is that we limit
# some checking.
my $Kn = $self->K_next_code($KK);
return unless ( $rLL->[$Kn]->[_TOKEN_] eq '(' );
my $seqno_paren = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
return unless ($seqno_paren);
my $K_closing_paren = $self->[_K_closing_container_]->{$seqno_paren};
my $K_opening_brace = $self->K_next_code($K_closing_paren);
return unless ($K_opening_brace);
my $seqno_brace = $rLL->[$K_opening_brace]->[_TYPE_SEQUENCE_];
my $token_brace = $rLL->[$K_opening_brace]->[_TOKEN_];
return unless ( $seqno_brace && $token_brace eq '{' );
# Treat signature variables like my variables
# Create special block on the stack..see note above for
# $is_if_unless
if ( $K_opening_brace > $K_end_my ) {
$K_end_my = $K_opening_brace;
$my_keyword = 'sub signature';
$in_signature_seqno = $seqno_paren;
$push_block_stack->($seqno_brace);
}
return;
}; ## end $check_sub_signature = sub
my $rkeyword_count = {};
my $rline_type_count = {};
#--------------------
# Loop over all lines
#--------------------
my $ix_line = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$ix_line++;
my $line_type = $line_of_tokens->{_line_type};
if ( $line_type ne 'CODE' ) {
$rline_type_count->{$line_type}++;
next;
}
my ( $Kfirst, $Klast ) = @{ $line_of_tokens->{_rK_range} };
next unless defined($Kfirst);
#----------------------------------
# Loop over all tokens on this line
#----------------------------------
foreach my $KK ( $Kfirst .. $Klast ) {
my $type = $rLL->[$KK]->[_TYPE_];
next if ( $type eq 'b' || $type eq '#' );
my $token = $rLL->[$KK]->[_TOKEN_];
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ($seqno) {
my $block_type;
$block_type = $rblock_type_of_seqno->{$seqno} if ($seqno);
my $is_on_stack = ( $seqno == $rblock_stack->[-1]->{seqno} );
if ( $is_opening_token{$token} ) {
# always push a block
if ($block_type) {
# exit signature if we will push a duplicate block
if ( $in_signature_seqno
&& @{$rblock_stack}
&& $seqno == $rblock_stack->[-1]->{seqno} )
{
$in_signature_seqno = 0;
}
$push_block_stack->($seqno);
# update sub count for cross-package checks
if ( $ris_sub_block->{$seqno} ) {
$sub_count_by_package{$current_package}++;
}
}
# look for something like @{word} etc
if ( $type eq 'L' ) {
$scan_braced_id->($KK);
}
}
elsif ( $is_closing_token{$token} ) {
# always pop the stack if this token is on the stack
if ($is_on_stack) {
my $stack_item = pop @{$rblock_stack};
my $rpopped_vars = $stack_item->{rvars};
# if we popped a block token
if ($block_type) {
# the current package gets updated at a block end
$current_package = $stack_item->{package};
# Check for unused vars if requested
if ( $check_unused && $rpopped_vars ) {
$check_for_unused_names->($rpopped_vars);
}
# Check for and propagate an if-chain control layer,
# which will have the same seqno.
if ( @{$rblock_stack}
&& $seqno == $rblock_stack->[-1]->{seqno} )
{
# pop again
$stack_item = pop @{$rblock_stack};
$rpopped_vars = $stack_item->{rvars};
# Check unused vars
# - except for vars in an if-chain control layer
# because they are involved in logic
if ( $check_unused
&& $rpopped_vars
&& !$is_if_unless_elsif_else{$block_type} )
{
$check_for_unused_names->($rpopped_vars);
}
# propagate control layer along if chain
if ( $is_if_unless_elsif{$block_type} ) {
$push_next_if_chain->( $KK, $rpopped_vars );
}
}
}
# error if we just popped a non-block token:
else {
my $K_n = $self->K_next_code($KK);
my $token_n = $rLL->[$K_n]->[_TOKEN_];
my $lno = $ix_line + 1;
DEVEL_MODE && Fault(<<EOM);
Non-block closing token '$token' on stack followed by token $token_n at line $lno
Expecting to find an opening token here.
EOM
}
}
# if not on the stack: error if this is a block
elsif ($block_type) {
my $lno = $ix_line + 1;
my $stack_seqno = $rblock_stack->[-1]->{seqno};
DEVEL_MODE
&& Fault(
"stack error: seqno=$seqno ne $stack_seqno near line $lno\n"
);
# give up - file may be unbalanced
return;
}
else {
# not a block, not on stack: nothing to do
}
}
else {
# ternary
}
}
#----------
# a keyword
#----------
elsif ( $type eq 'k' ) {
#----------------------------------------------
# look for lexical keyword 'my', 'state', 'our'
#----------------------------------------------
if ( $is_my_state_our{$token} ) {
$my_keyword = $token;
# Set '$K_end_my' to be the last $K index of the variables
# controlled by this 'my' keyword
my $Kn = $self->K_next_code($KK);
$K_end_my = $Kn;
if ( $Kn && $rLL->[$Kn]->[_TOKEN_] eq '(' ) {
my $seqno_next = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
$K_end_my = $K_closing_container->{$seqno_next};
}
# Get initial count
$my_starting_count = 0;
my $K_last_code = $self->K_previous_code($KK);
if ( defined($K_last_code) ) {
my $last_type = $rLL->[$K_last_code]->[_TYPE_];
# A preceding \ implies that this memory can be used
# even if the variable name does not appear again.
# For example: return \my $string_buf;
if ( $last_type eq '\\' ) { $my_starting_count = 1 }
}
}
#--------------------------------------------------
# look for certain keywords which introduce blocks:
# such as 'for my $var (..) { ... }'
#--------------------------------------------------
elsif ( $is_if_unless_while_until_for_foreach{$token} ) {
my ( $seqno_brace, $K_end_iterator ) =
$self->block_seqno_of_paren_keyword($KK);
if ($seqno_brace) {
# Found the brace. Mark an iterator as a new lexical
# variable in order to catch something like:
# my $i;
# foreach $i(...) { }
# where the iterator $i is not the same as the first
# $i, We should be beyond any existing $K_end_my, but
# check anyway:
if ( $K_end_iterator && $K_end_iterator > $K_end_my ) {
$K_end_my = $K_end_iterator;
$my_keyword = $token;
}
# Variables created between these keywords and their
# opening brace have special scope rules. We will
# create a special 'control layer' stack entry for them
# here, with the same block sequence number. When the
# closing block brace arrives, it will look for a
# duplicate stack entry and either close it or,
# for if-elsif-else chain, propagate it onward.
$push_block_stack->($seqno_brace);
}
}
elsif ( $token eq 'sub' ) {
$check_sub_signature->($KK);
}
else {
$rkeyword_count->{$token}++;
}
}
#--------------
# an identifier
#--------------
elsif ( $type eq 'i' || $type eq 'Z' ) {
# Still collecting 'my' vars?
if ( $KK <= $K_end_my ) {
$checkin_new_lexical->($KK);
}
# Not collecting 'my' vars - update counts
elsif ( $check_unused || $check_constant ) {
my $sigil_string = EMPTY_STRING;
my $word = EMPTY_STRING;
# The regex below will match numbers, like '$34x', but that
# should not be a problem because it will not match a hash
# key.
if ( $token =~ /^(\W+)?(\w.*)$/ ) {
$sigil_string = $1 if ($1);
$word = $2;
if ( $check_constant && $word ) {
# look for constant invoked like '&ORD' or '->ORD'
if ( !$sigil_string || $sigil_string eq '&' ) {
$update_constant_count->( $KK, $word );
}
elsif ( $sigil_string eq '@'
&& index( $word, 'EXPORT' ) >= 0 )
{
# Looking for stuff like:
# @EXPORT_OK
# @ALPHA::BETA::EXPORT
my $package = $current_package;
my $name = $word;
my $pos = rindex( $word, '::' );
if ( $pos >= 0 ) {
$package = substr( $word, 0, $pos );
$name = substr( $word, $pos + 2 );
}
if ( $name eq 'EXPORT' || $name eq 'EXPORT_OK' )
{
$push_new_EXPORT->( $KK, $package );
}
}
else { }
}
if ($sigil_string) {
my $sigil = substr( $sigil_string, -1, 1 );
if ( !$is_valid_sigil{$sigil} ) {
$sigil_string = EMPTY_STRING;
$word = EMPTY_STRING;
}
}
}
if ( $check_unused
&& $sigil_string
&& $word
&& $word =~ /\w+/ )
{
my $Kn = $self->K_next_code($KK);
my $bracket;
if ( defined($Kn) ) {
my $next_type = $rLL->[$Kn]->[_TYPE_];
if ( $next_type eq '[' || $next_type eq 'L' ) {
$bracket = $rLL->[$Kn]->[_TOKEN_];
}
}
$update_use_count->( $sigil_string, $word, $bracket );
}
}
else {
# ignore variable if not collecting 'my' or counts
}
}
#----------------
# a sub statement
#----------------
elsif ( $type eq 'S' ) {
$check_sub_signature->($KK);
}
#--------------------
# a package statement
#--------------------
elsif ( $type eq 'P' ) {
my ( $keyword, $package ) = split /\s+/, $token, 2;
# keyword 'package' may be on a previous line
if ( !$package ) { $package = $keyword }
if ( $package ne $current_package ) {
$current_package = $package;
# Look for lexical vars declared in other packages which
# will be accessible in this package. We will limit
# this check to new package statements at the top level
# in order to filter out some common cases.
if ( $check_cross_package && @{$rblock_stack} == 1 ) {
my $rpackage_warnings = $package_warnings{$package};
if ( !defined($rpackage_warnings) ) {
$rpackage_warnings = [];
$package_warnings{$package} = $rpackage_warnings;
}
foreach my $item ( @{$rblock_stack} ) {
my $rhash = $item->{rvars};
foreach my $name ( keys %{$rhash} ) {
my $entry = $rhash->{$name};
my $pkg = $entry->{package};
if ( $pkg ne $package ) {
my $lno = $ix_line + 1;
my $see_line = $lno;
my $note =
"is accessible in later packages, see line $see_line";
push @{$rpackage_warnings},
{
name => $name,
keyword => $entry->{keyword},
note => $note,
see_line => $see_line,
line_number => $entry->{line_index} + 1,
letter => 'p',
K => $entry->{K},
};
}
}
}
}
}
}
#-----------
# a here doc
#-----------
elsif ( $type eq 'h' ) {
if ($check_unused) {
# collect the here doc text
my $ix_HERE = max( $ix_HERE_END, $ix_line );
( $ix_HERE_END, my $here_text ) =
$self->get_here_text($ix_HERE);
# scan here-doc if it is interpolated
if ( is_interpolated_here_doc($token) ) {
$scan_quoted_text->($here_text);
}
}
}
#---------------------
# a quote of some type
#---------------------
elsif ( $type eq 'Q' ) {
# is this an interpolated quote?
my $interpolated;
if ( $KK == $Kfirst && $line_of_tokens->{_starting_in_quote} ) {
$interpolated = $in_interpolated_quote;
}
else {
# is interpolated if it follow a match operator =~ or !~
my $K_last_code = $self->K_previous_code($KK);
if ( $K_last_code
&& $is_re_match_op{ $rLL->[$K_last_code]->[_TYPE_] } )
{
$interpolated = 1;
}
# is not interpolated for leading operators: qw q tr y '
elsif ( $token =~ /^(qw | q[^qrx] | tr | [y\'] )/x ) {
$interpolated = 0;
}
# is interpolated for everything else
else {
$interpolated = 1;
}
}
if ($interpolated) {
$scan_quoted_text->($token);
}
if ( $KK == $Klast && $line_of_tokens->{_ending_in_quote} ) {
$in_interpolated_quote = $interpolated;
}
else {
$in_interpolated_quote = 0;
}
}
elsif ( $type eq 'w' ) {
if ( $token eq 'vars' ) {
my $Kp = $self->K_previous_code($KK);
if ( defined($Kp)
&& $rLL->[$Kp]->[_TOKEN_] eq 'use'
&& $rLL->[$Kp]->[_TYPE_] eq 'k' )
{
$scan_use_vars->($KK);
}
}
if ($check_constant) {
if ( $token eq 'constant' ) {
my $Kp = $self->K_previous_code($KK);
if ( defined($Kp)
&& $rLL->[$Kp]->[_TOKEN_] eq 'use'
&& $rLL->[$Kp]->[_TYPE_] eq 'k' )
{
$scan_use_constant->($KK);
}
else {
$update_constant_count->($KK);
}
}
else {
$update_constant_count->($KK);
}
}
}
elsif ( $type eq 'C' ) {
if ($check_constant) {
$update_constant_count->($KK);
}
}
elsif ( $type eq 'U' ) {
if ($check_constant) {
$update_constant_count->($KK);
}
}
else {
# skip all other token types
}
}
}
#----------
# Finish up
#----------
# skip final 'c' and 'u' output if this appears to be a snippet
my $is_possible_snippet = $roption->{is_possible_snippet};
my $more_u_checks =
$check_unused
&& @{$rblock_stack} == 1
&& keys %{ $rblock_stack->[0]->{rvars} };
my $more_c_checks = $check_constant && keys %{$rconstant_hash};
if ( $is_possible_snippet
&& ( $more_u_checks || $more_c_checks ) )
{
# the flag $is_possible_snippet = 0:No 1:Uncertain 2:Yes
if ( $is_possible_snippet == 1
&& $self->is_complete_script( $rline_type_count, $rkeyword_count ) )
{
# not a snippet
}
# is possible snippet: deactivate 'c' and 'u
else {
$check_unused = 0;
$check_constant = 0;
}
}
if ( @{$rblock_stack} != 1 ) {
# shouldn't happen for a balanced input file
DEVEL_MODE && Fault("stack error at end of scan\n");
}
else {
if ($check_unused) {
foreach my $item ( @{$rblock_stack} ) {
my $rhash = $item->{rvars};
$check_for_unused_names->($rhash);
}
}
}
if ($check_constant) {
my @warnings_c;
my %packages_with_warnings;
foreach my $package ( keys %{$rconstant_hash} ) {
my $rhash = $rconstant_hash->{$package};
next if ( !defined($rhash) );
foreach my $name ( keys %{$rhash} ) {
my $entry = $rconstant_hash->{$package}->{$name};
next if ( $entry->{count} );
push @warnings_c,
{
name => $name,
keyword => 'use constant',
see_line => EMPTY_STRING,
note => "appears unused in package $package",
line_number => $entry->{line_index} + 1,
letter => 'c',
package => $package,
K => $entry->{K},
};
$packages_with_warnings{$package} = 1;
}
}
# filter out constants found in @EXPORT and @EXPORT_OK
if (@warnings_c) {
# expand relevant EXPORT lists
my $rEXPORT_words_by_package = {};
foreach my $package ( keys %packages_with_warnings ) {
my $rKlist = $rEXPORT_hash->{$package};
next unless ($rKlist);
$rEXPORT_words_by_package->{$package} = {};
foreach my $KK ( @{$rKlist} ) {
$self->expand_EXPORT_list( $KK,
$rEXPORT_words_by_package->{$package} );
}
}
# remove warnings in EXPORT lists
foreach my $rwarning (@warnings_c) {
my $package = $rwarning->{package};
my $name = $rwarning->{name};
my $rhash = $rEXPORT_words_by_package->{$package};
next if ( $rhash && $rhash->{$name} );
push @warnings, $rwarning;
}
}
}
# Merge package issues...
# Only include cross-package warnings for packages which created subs.
# This will limit this type of warning to significant package changes.
my @p_warnings;
foreach my $key ( keys %package_warnings ) {
next if ( !$sub_count_by_package{$key} );
push @p_warnings, @{ $package_warnings{$key} };
}
# Remove duplicate package warnings for the same initial line, which can
# happen if there were multiple packages.
if (@p_warnings) {
my %seen;
# sort on package warning line order
@p_warnings = sort { $a->{see_line} <=> $b->{see_line} } @p_warnings;
# use first package warning for a given variable
foreach my $item (@p_warnings) {
my $key = $item->{line_number} . ':' . $item->{name};
next if ( $seen{$key}++ );
push @warnings, $item;
}
}
if (@warnings) {
# filter out certain common 'our' variables from all warnings
# because they are common and difficult to fix, and
# sort on token index and issue type
my %is_exempted_global_name;
my @q = qw( $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS @ISA $AUTOLOAD );
@is_exempted_global_name{@q} = (1) x scalar(@q);
@warnings =
sort { $a->{K} <=> $b->{K} || $a->{letter} cmp $b->{letter} }
# NOTE: 'use vars' is not currently needed in the following test
# but is retained in case coding ever changes
grep {
( $_->{keyword} ne 'our' && $_->{keyword} ne 'use vars' )
|| !$is_exempted_global_name{ $_->{name} }
} @warnings;
}
return ( \@warnings, $issue_type_string );
} ## end sub scan_variable_usage
sub dump_unusual_variables {
my ($self) = @_;
# process a --dump-unusual-variables(-duv) command
my ( $rlines, $issue_type_string ) = $self->scan_variable_usage();
return unless ( $rlines && @{$rlines} );
my $input_stream_name = get_input_stream_name();
# output for multiple types
my $output_string = <<EOM;
$input_stream_name: output for --dump-unusual-variables
$issue_type_string
Line:Issue: Var: note
EOM
foreach my $item ( @{$rlines} ) {
my $lno = $item->{line_number};
my $letter = $item->{letter};
my $keyword = $item->{keyword};
my $name = $item->{name};
my $note = $item->{note};
if ($note) { $note = ": $note" }
$output_string .= "$lno:$letter: $keyword $name$note\n";
}
print {*STDOUT} $output_string;
return;
} ## end sub dump_unusual_variables
sub initialize_warn_hash {
my ( $long_name, $default, $rall_opts ) = @_;
# Given:
# $long_name = full option name
# $default = default value
# $rall_opts = all possible options
# Return the corresponding option hash
# Example of all possible options for --warn-variable-types=s
# r - reused scope
# s - reused sigil
# p - package boundaries crossed by lexical variables
# u - unused lexical variable defined by my, state, our
# c - unused constant defined by use constant
# Other warn options use different letters
# Other controls:
# 0 - none of the above
# 1 - all of the above
# * - all of the above
# Example:
# -wvt='s r' : do check types 's' and 'r'
my $rwarn_hash = {};
if ( !$rall_opts || !@{$rall_opts} ) {
Fault("all_options is empty for call with option $long_name\n");
return $rwarn_hash;
}
my $user_option_string = $rOpts->{$long_name};
if ( !defined($user_option_string) ) { $user_option_string = $default }
return $rwarn_hash unless ($user_option_string);
my %is_valid_option;
@is_valid_option{ @{$rall_opts} } = (1) x scalar( @{$rall_opts} );
# allow comma separators
$user_option_string =~ s/,/ /g;
my @opts = split_words($user_option_string);
return $rwarn_hash unless (@opts);
# check a single item
if ( @opts == 1 ) {
my $opt = $opts[0];
# Split a single option of bundled letters like 'rsp' into 'r s p'
# but give a warning because this may not be allowed in the future
if ( length($opt) > 1 ) {
@opts = split //, $opt;
Warn("Please use space-separated letters in --$long_name\n");
}
elsif ( $opt eq '*' || $opt eq '1' ) {
@opts = keys %is_valid_option;
}
elsif ( $opt eq '0' ) {
return $rwarn_hash;
}
else {
# should be one of the allowed letters - catch any error below
}
}
my $msg = EMPTY_STRING;
foreach my $opt (@opts) {
if ( $is_valid_option{$opt} ) {
$rwarn_hash->{$opt} = 1;
next;
}
# invalid option..
if ( $opt =~ /^[01\*]$/ ) {
$msg .=
"--$long_name cannot contain $opt mixed with other options\n";
}
else {
$msg .= "--$long_name has unexpected symbol: '$opt'\n";
}
}
if ($msg) { Die($msg) }
return $rwarn_hash;
} ## end sub initialize_warn_hash
sub make_excluded_name_hash {
my ($option_name) = @_;
# Convert a list of words into a hash ref for an input option
# Given:
# $option_name = the name of an input option
# example: 'warn-variable-exclusion-list'
my $rexcluded_name_hash = {};
my $excluded_names = $rOpts->{$option_name};
if ($excluded_names) {
$excluded_names =~ s/,/ /g;
my @xl = split_words($excluded_names);
my $err_msg = EMPTY_STRING;
foreach my $name (@xl) {
if ( $name =~ /^([\$\@\%\*])?(\w+)?(\*)?$/ ) {
my $left_star = $1;
my $key = $2;
my $right_star = $3;
if ( defined($left_star) ) {
if ( $left_star ne '*' ) {
if ( defined($key) ) {
# append sigil to the bareword
$key = $left_star . $key;
}
else {
# word not given: '$*' is ok but just '$' is not
if ($right_star) { $key = $left_star }
}
$left_star = EMPTY_STRING;
}
}
# Wildcard matching codes:
# 1 = no stars
# 2 = left star only
# 3 = right star only
# 4 = both left and right stars
my $code = 1;
$code += 1 if ($left_star);
$code += 2 if ($right_star);
if ( !defined($key) ) {
$err_msg .= "--$option_name has unexpected name: '$name'\n";
}
else {
$rexcluded_name_hash->{$key} = $code;
}
}
else {
$err_msg .= "--$option_name has unexpected name: '$name'\n";
}
}
if ($err_msg) { Die($err_msg) }
}
return $rexcluded_name_hash;
} ## end sub make_excluded_name_hash
sub wildcard_match {
my ( $name, $rwildcard_match_list ) = @_;
# Given:
# $name = a string to test for a match
# $rwildcard_match_list = a list of [key,code] pairs:
# key = a string to match
# code = 2, 3, or 4 is match type (see comments below)
# Return:
# true for a match
# false for no match
# For example, key='$pack' with code=3 is short for '$pack*'
# which will match '$package', '$packer', etc
# Loop over all possible matches
foreach ( @{$rwildcard_match_list} ) {
my ( $key, $code ) = @{$_};
my $len_key = length($key);
my $len_name = length($name);
next if ( $len_name < $len_key );
# code 2 = left star only
if ( $code == 2 ) {
if ( substr( $name, -$len_key, $len_key ) eq $key ) { return 1 }
}
# code 3 = right star only
elsif ( $code == 3 ) {
if ( substr( $name, 0, $len_key ) eq $key ) { return 1 }
}
# code 4 = both left and right stars
elsif ( $code == 4 ) {
if ( index( $name, $key, 0 ) >= 0 ) { return 1 }
}
else {
DEVEL_MODE && Fault("unexpected code '$code' for '$name'\n");
}
}
return;
} ## end sub wildcard_match
sub initialize_warn_variable_types {
my ( $wvt_in_args, $num_files, $line_range_clipped ) = @_;
# Initialization for:
# --warn-variable-types=s and
# --warn-variable-exclusion-list=s
# Given:
# $wvt_in_args = true if the -wvt parameter was on the command line
# $num_files = number of files on the command line
# $line_range_clipped = true if only part of a file is being formatted
my @all_opts = qw( r s p u c );
$rwarn_variable_types =
initialize_warn_hash( 'warn-variable-types', 0, \@all_opts );
# Check for issues 'u' or 'c' cannot be fully made if we are working
# on a partial file (snippet), so we save info about that.
if ( $rwarn_variable_types->{u} || $rwarn_variable_types->{c} ) {
# Three value switch: 0=NO, 1=MAYBE 2=DEFINITELY
my $is_possible_snippet = 1;
# assume snippet if incomplete line range is being formatted
if ($line_range_clipped) {
$is_possible_snippet = 2;
}
# assume complete script if operating on multiple files or if
# operating on one file and -wvt came in on the command line
if ( $is_possible_snippet == 1 && $num_files ) {
if ( $num_files > 1 || $wvt_in_args && $num_files ) {
$is_possible_snippet = 0;
}
}
$rwarn_variable_types->{is_possible_snippet} = $is_possible_snippet;
}
$ris_warn_variable_excluded_name =
make_excluded_name_hash('warn-variable-exclusion-list');
return;
} ## end sub initialize_warn_variable_types
sub filter_excluded_names {
my ( $rwarnings, $rexcluded_name_hash ) = @_;
# Remove warnings for variable names excluded by user request
# for an operation like --warn-variable-types
# Given:
# $rwarnigns = ref to list of warning info hashes
# $rexcluded_name_hash = ref to hash with excluded names
# Return updated $rwarnings with excluded names removed
if ( @{$rwarnings} && $rexcluded_name_hash ) {
# Check for exact matches
$rwarnings =
[ grep { !$rexcluded_name_hash->{ $_->{name} } } @{$rwarnings} ];
# See if there are any wildcard names
my @excluded_wildcards;
foreach my $key ( keys %{$rexcluded_name_hash} ) {
my $code = $rexcluded_name_hash->{$key};
if ( $code != 1 ) {
push @excluded_wildcards, [ $key, $code ];
}
}
if (@excluded_wildcards) {
my @tmp;
foreach my $item ( @{$rwarnings} ) {
my $name = $item->{name};
if ( wildcard_match( $name, \@excluded_wildcards ) ) {
next;
}
push @tmp, $item;
}
$rwarnings = \@tmp;
}
}
return $rwarnings;
} ## end sub filter_excluded_names
sub warn_variable_types {
my ($self) = @_;
# process a --warn-variable-types command
my $wv_key = 'warn-variable-types';
my $wv_option = $rOpts->{$wv_key};
return unless ( %{$rwarn_variable_types} );
my ( $rwarnings, $issue_type_string ) =
$self->scan_variable_usage($rwarn_variable_types);
return unless ( $rwarnings && @{$rwarnings} );
$rwarnings =
filter_excluded_names( $rwarnings, $ris_warn_variable_excluded_name );
# loop to form error messages
my $message_middle = EMPTY_STRING;
foreach my $item ( @{$rwarnings} ) {
my $name = $item->{name};
my $lno = $item->{line_number};
my $letter = $item->{letter};
my $keyword = $item->{keyword};
my $note = $item->{note};
if ($note) { $note = ": $note" }
$message_middle .= "$lno:$letter: $keyword $name$note\n";
}
if ($message_middle) {
my $message = "Begin scan for --$wv_key=$wv_option\n";
$message .= <<EOM;
$issue_type_string
Line:Issue: Var: note
EOM
$message .= $message_middle;
$message .= "End scan for --$wv_key=$wv_option:\n";
warning($message);
}
return;
} ## end sub warn_variable_types
sub block_seqno_of_paren_seqno {
my ( $self, $seqno_paren ) = @_;
# Find brace at '){' after paren of keyword such as for, foreach, ...
# SEE ALSO: sub block_seqno_of_paren_keyword
# Given:
# $seqno_paren = sequence number of the paren following a keyword which
# may either introduce a block or be a trailing statement modifier,
# such as 'if',
# Return:
# - the sequence number of the block, if any, or
# - nothing
# if (...) { ...
# ^ ^ ^
# | | |
# | | K_opening_brace => return sequno of this brace
# | K_closing_paren
# $seqno_paren = seqno of this paren pair
return unless $seqno_paren;
my $K_closing_paren = $self->[_K_closing_container_]->{$seqno_paren};
return unless ($K_closing_paren);
my $K_opening_brace = $self->K_next_code($K_closing_paren);
return unless ($K_opening_brace);
my $rLL = $self->[_rLL_];
my $seqno_block = $rLL->[$K_opening_brace]->[_TYPE_SEQUENCE_];
return
unless ( $seqno_block
&& $rLL->[$K_opening_brace]->[_TOKEN_] eq '{'
&& $self->[_rblock_type_of_seqno_]->{$seqno_block} );
return $seqno_block;
} ## end sub block_seqno_of_paren_seqno
sub dump_mixed_call_parens {
my ($self) = @_;
# Implent --dump-mixed-call-parens
my $opt_name = 'dump-mixed-call-parens';
return unless $rOpts->{$opt_name};
my $rLL = $self->[_rLL_];
my %skip_keywords;
my @q = qw( my our local state
and cmp continue do else elsif eq ge gt le lt ne not or xor );
@skip_keywords{@q} = (1) x scalar(@q);
my %call_counts;
foreach my $KK ( 0 .. @{$rLL} - 1 ) {
# Types which will be checked:
# 'k'=builtin keyword, 'U'=user defined sub, 'w'=unknown bareword
next unless ( $is_kwU{ $rLL->[$KK]->[_TYPE_] } );
my $type = $rLL->[$KK]->[_TYPE_];
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $type eq 'k' && $skip_keywords{$token} ) { next }
my $Kn = $self->K_next_code($KK);
next unless defined($Kn);
my $token_Kn = $rLL->[$Kn]->[_TOKEN_];
my $have_paren;
if ( $token_Kn eq '=>' ) { next }
elsif ( $token_Kn eq '->' ) { next }
elsif ( $token_Kn eq '(' ) { $have_paren = 1 }
else { $have_paren = 0 }
# return if this is the block form of 'if', 'unless', ..
if ( $have_paren
&& $is_if_unless_while_until_for_foreach{$token} )
{
my $seqno = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
next if ( $self->block_seqno_of_paren_seqno($seqno) );
}
if ( !defined( $call_counts{$token} ) ) {
$call_counts{$token} = [ 0, 0, $type ];
}
$call_counts{$token}->[$have_paren]++;
}
my @mixed_counts;
foreach my $key ( keys %call_counts ) {
my ( $no_count, $yes_count, $type ) = @{ $call_counts{$key} };
next unless ( $no_count && $yes_count );
push @mixed_counts,
{
name => $key,
type => $type,
no_count => $no_count,
yes_count => $yes_count,
};
}
return unless (@mixed_counts);
# sort on lc of type so that user sub type 'U' will come after 'k'
my @sorted =
sort { lc $a->{type} cmp lc $b->{type} || $a->{name} cmp $b->{name} }
@mixed_counts;
my $input_stream_name = get_input_stream_name();
my $output_string = <<EOM;
$input_stream_name: output for --dump-mixed-call-parens
use -wcp=s and/or nwcp=s to find line numbers, where s is a string of words
types are 'k'=builtin keyword 'U'=user sub 'w'=other word
type:word:+count:-count
EOM
foreach my $item (@sorted) {
my $type = $item->{type};
my $name = $item->{name};
my $no_count = $item->{no_count};
my $yes_count = $item->{yes_count};
$output_string .= "$type:$name:$yes_count:$no_count\n";
}
print {*STDOUT} $output_string;
return;
} ## end sub dump_mixed_call_parens
sub initialize_call_paren_style {
# parse --want-call-parens=s and --nowant-call-parens=s
# and store results in this global hash:
%call_paren_style = ();
my $iter = -1;
foreach my $opt_name ( 'nowant-call-parens', 'want-call-parens' ) {
$iter++;
my $opt = $rOpts->{$opt_name};
next unless defined($opt);
# allow comma separators
$opt =~ s/,/ /g;
if ( my @q = split_words($opt) ) {
foreach my $word (@q) {
# words must be simple identifiers, or '&'
if ( $word !~ /^(?:\&|\w+)$/ || $word =~ /^\d/ ) {
Die("Unexpected word in --$opt_name: '$word'\n");
}
if ( $iter && defined( $call_paren_style{$word} ) ) {
Warn("'$word' occurs in both -nwcp and -wcp, using -wcp\n");
}
}
@call_paren_style{@q} = ($iter) x scalar(@q);
}
}
return;
} ## end sub initialize_call_paren_style
sub scan_call_parens {
my ($self) = @_;
# Perform a scan requested by --want-call-parens
# We search for selected functions or keywords and for a following paren.
# A warning is issued if the paren existence is not what is wanted
# according to the setting --want-call-parens.
# This routine does not attempt to add or remove parens, it merely
# issues a warning so that the user can make a change if desired.
# It is risky to add or delete parens automatically; see git #128.
return unless (%call_paren_style);
my $opt_name = 'want-call-parens';
my $rwarnings = [];
#---------------------
# Loop over all tokens
#---------------------
my $rLL = $self->[_rLL_];
foreach my $KK ( 0 .. @{$rLL} - 1 ) {
# Types which will be checked:
# 'k'=builtin keyword, 'U'=user defined sub, 'w'=unknown bareword
next unless ( $is_kwU{ $rLL->[$KK]->[_TYPE_] } );
# Are we looking for this word?
my $type = $rLL->[$KK]->[_TYPE_];
my $token = $rLL->[$KK]->[_TOKEN_];
my $want_paren = $call_paren_style{$token};
# Only user-defined subs (type 'U') have defaults.
if ( !defined($want_paren) ) {
$want_paren =
$type eq 'k' ? undef
: $type eq 'U' ? $call_paren_style{'&'}
: undef;
}
next unless defined($want_paren);
# This is a selected word. Look for a '(' at the next token.
my $Kn = $self->K_next_code($KK);
next unless defined($Kn);
my $token_Kn = $rLL->[$Kn]->[_TOKEN_];
if ( $token_Kn eq '=>' ) { next }
elsif ( $token_Kn eq '->' ) { next }
elsif ( $token_Kn eq '(' ) { next if ($want_paren) }
else { next if ( !$want_paren ) }
# return if this is the block form of 'if', 'unless', ..
if ( $token_Kn eq '('
&& $is_if_unless_while_until_for_foreach{$token} )
{
my $seqno = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
next if ( $self->block_seqno_of_paren_seqno($seqno) );
}
# This disagrees with the wanted style; issue a warning.
my $note = $want_paren ? "no call parens" : "has call parens";
my $rwarning = {
token => $token,
token_next => $token_Kn,
note => $note,
line_number => $rLL->[$KK]->[_LINE_INDEX_] + 1,
## want => $want_paren,
## KK => $KK,
## Kn => $Kn,
};
push @{$rwarnings}, $rwarning;
}
# Report any warnings
if ( @{$rwarnings} ) {
my $message = "Begin scan for --$opt_name\n";
$message .= <<EOM;
Line:text:
EOM
foreach my $item ( @{$rwarnings} ) {
my $token = $item->{token};
my $token_next = $item->{token_next};
my $note = $item->{note};
my $lno = $item->{line_number};
# trim long tokens for the output line
if ( length($token_next) > 23 ) {
$token_next = substr( $token_next, 0, 20 ) . '...';
}
# stop before a ':' to allow use of ':' as spreadsheet col separator
my $ii = index( $token_next, ':' );
if ( $ii >= 0 ) { $token_next = substr( $token_next, 0, $ii ) }
$message .= "$lno:$token $token_next: $note\n";
}
$message .= "End scan for --$opt_name\n";
# Note that this is sent in a single call to warning() in order
# to avoid triggering a stop on large warning count
warning($message);
}
return;
} ## end sub scan_call_parens
sub find_non_indenting_braces {
my ( $self, $rix_side_comments ) = @_;
# Find and mark all non-indenting braces in this file.
# Given:
# $rix_side_comments = index of lines which have side comments
# Find and save the line indexes of these special side comments in:
# $self->[_rseqno_non_indenting_brace_by_ix_];
# Non-indenting braces are opening braces of the form
# { #<<< ...
# which do not cause an increase in indentation level.
# They are enabled with the --non-indenting-braces, or -nib, flag.
return unless ( $rOpts->{'non-indenting-braces'} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rseqno_non_indenting_brace_by_ix =
$self->[_rseqno_non_indenting_brace_by_ix_];
foreach my $ix ( @{$rix_side_comments} ) {
my $line_of_tokens = $rlines->[$ix];
my $line_type = $line_of_tokens->{_line_type};
if ( $line_type ne 'CODE' ) {
# shouldn't happen
DEVEL_MODE && Fault("unexpected line_type=$line_type\n");
next;
}
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
if ( !defined($Kfirst) || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
# shouldn't happen
DEVEL_MODE && Fault("did not get a comment\n");
next;
}
next if ( $Klast <= $Kfirst ); # maybe HSC
my $token_sc = $rLL->[$Klast]->[_TOKEN_];
my $K_m = $Klast - 1;
my $type_m = $rLL->[$K_m]->[_TYPE_];
if ( $type_m eq 'b' && $K_m > $Kfirst ) {
$K_m--;
$type_m = $rLL->[$K_m]->[_TYPE_];
}
my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
if ($seqno_m) {
my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
# The pattern ends in \s but we have removed the newline, so
# we added it back for the match. That way we require an exact
# match to the special string and also allow additional text.
$token_sc .= "\n";
if ( $block_type_m
&& $is_opening_type{$type_m}
&& $token_sc =~ /$non_indenting_brace_pattern/ )
{
$rseqno_non_indenting_brace_by_ix->{$ix} = $seqno_m;
}
}
}
return;
} ## end sub find_non_indenting_braces
sub interbracket_arrow_check {
my ($self) = @_;
# Implement the options to add or delete optional arrows between brackets
my $rOpts_add = $rOpts->{'add-interbracket-arrows'};
my $rOpts_del = $rOpts->{'delete-interbracket-arrows'};
my $rOpts_warn = $rOpts->{'warn-interbracket-arrows'};
my $rOpts_warn_and_style = $rOpts_warn && %interbracket_arrow_style;
return
unless ( $rOpts_add || $rOpts_del || $rOpts_warn_and_style );
# Method:
# Loop over all opening brackets and look back for a possible arrow
# and closing bracket. If the location between brackets allows an
# optional arrow, then see if one should be added or deleted.
# Set a flag for sub respace_tokens which will make the change.
# Deleting examples:
# $variables->{'a'}->{'b'} $variables->{'a'}{'b'}
# $variables{'a'}->{'b'} $variables{'a'}->{'b'}
# $items[1]->[4]->{red} $items[1][4]{red}
# $items{blue}->[4]->{red} $items{blue}[4]{red}
# Adding examples:
# $variables->{'a'}{'b'} $variables->{'a'}->{'b'}
# $variables{'a'}->{'b'} $variables{'a'}->{'b'}
# $items[1][4]{red} $items[1]->[4]->{red}
# $items{blue}[4]{red} $items{blue}->[4]->{red}
# bracket chain ] { } [ ] [
# | | |
# arrow ok? ? ? ?
# The following chain rule is used to locate optional arrow locations:
# Scanning left to right:
# -arrows can begin once we see an opening token preceded by:
# - an ->, or
# - a simple scalar identifier like '$href{' or '$aryref['
# - Once arrows begin they may continue to the end of the bracket chain.
# To illustrate why we just can't add and remove arrows between
# ']' and '[', for example, consider
# my $v1 = [ 1, 2, [ 3, 4 ] ]->[2]->[0]; # ok
# my $v2 = [ 1, 2, [ 3, 4 ] ]->[2][0]; # ok, keep required arrow
# my $v3 = [ 1, 2, [ 3, 4 ] ][2][0]; # Error
# Note that an arrow does not get placed between '}' and '[' here:
# my $val = ${$x}[1];
# Perltidy marks the '$' as type 't', and since the logic below checks
# for identifiers of type 'i', it will work ok.
# We will maintain the flag for this check in the following hash:
my %trailing_arrow_ok_by_seqno;
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $K_opening_container = $self->[_K_opening_container_];
my @lno_del;
my @lno_add;
my $warn = sub {
# write a warning on changes made or needed if -wia is set
my ( $rlno_list, $first_word ) = @_;
my $str;
my $num_changes = @{$rlno_list};
my @unique_lno = do {
my %seen;
grep { !$seen{$_}++ } @{$rlno_list};
};
my $num_lno = @unique_lno;
my $num_lim = 10;
if ( $num_lno <= $num_lim ) {
$str = join( SPACE, @unique_lno );
}
else {
$str = join( SPACE, @unique_lno[ 0 .. $num_lim - 1 ] ) . " ...";
}
my $ess1 = $num_changes == 1 ? EMPTY_STRING : 's';
my $ess2 = $num_lno == 1 ? EMPTY_STRING : 's';
my $msg = "$first_word $num_changes '->'$ess1 at line$ess2 $str\n";
warning($msg);
return;
}; ## end $warn = sub
# Complexity control flag:
# =0 left container must just contain a single token
# =1 left container must not contain other containers [DEFAULT]
# =2 no complexity constraints
my $complexity = $rOpts->{'interbracket-arrow-complexity'};
if ( !defined($complexity) ) { $complexity = 1 }
#--------------------------------------------
# Main loop over all opening container tokens
#--------------------------------------------
foreach my $seqno ( sort { $a <=> $b } keys %{$K_opening_container} ) {
# We just want opening token types 'L" or '['
# Note: the tokenizer marks hash braces '{' and '}' as 'L' and 'R'
# but we have to be careful because small block braces can also
# get marked 'L' and 'R' for formatting purposes.
my $Ko = $K_opening_container->{$seqno};
my $type = $rLL->[$Ko]->[_TYPE_];
next if ( $type ne 'L' && $type ne '[' );
# Now find the previous nonblank token
my $K_m = $Ko - 1;
next if ( $K_m < 0 );
my $type_m = $rLL->[$K_m]->[_TYPE_];
if ( $type_m eq 'b' && $K_m > 0 ) {
$K_m -= 1;
$type_m = $rLL->[$K_m]->[_TYPE_];
}
# These vars will hold the previous closing bracket, if any;
# initialized to this token but will be moved if it is an arrow
my $K_mm = $K_m;
my $type_mm = $type_m;
# Decide if an inter-bracket arrow could follow the closing token
# of this container..
# preceded by scalar identifier (such as '$array[' or '$hash{') ?
if ( $type_m eq 'i' || $type_m eq 'Z' ) {
my $token_m = $rLL->[$K_m]->[_TOKEN_];
if ( substr( $token_m, 0, 1 ) eq '$' ) {
# arrows can follow the CLOSING bracket of this container
$trailing_arrow_ok_by_seqno{$seqno} = 1;
}
}
# or a closing bracket or hash brace
elsif ( $type_m eq ']' || $type_m eq 'R' ) {
my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
# propagate the arrow status flag
$trailing_arrow_ok_by_seqno{$seqno} =
$trailing_arrow_ok_by_seqno{$seqno_m};
}
# check a pointer and if found, back up one more token
elsif ( $type_m eq '->' ) {
# arrows can follow the CLOSING bracket of this container
$trailing_arrow_ok_by_seqno{$seqno} = 1;
# back up one token before the arrow
$K_mm = $K_m - 1;
next if ( $K_mm <= 0 );
$type_mm = $rLL->[$K_mm]->[_TYPE_];
if ( $type_mm eq 'b' && $K_mm > 0 ) {
$K_mm -= 1;
$type_mm = $rLL->[$K_mm]->[_TYPE_];
}
}
else {
# something else
}
# now check for a preceding closing bracket or hash brace
next if ( $type_mm ne ']' && $type_mm ne 'R' );
my $seqno_mm = $rLL->[$K_mm]->[_TYPE_SEQUENCE_];
next if ( !$seqno_mm );
$trailing_arrow_ok_by_seqno{$seqno} = 1;
# We are between brackets with these two or three sequential tokens,
# indexes _mm and _m are identical if there is no arrow.
# $type_mm $type_m $type
# R or ] ->? [ or L
# Can an inter-bracket arrow be here?
next unless ( $trailing_arrow_ok_by_seqno{$seqno_mm} );
# If the user defined a style, only continue if this requires
# adding or deleting an '->' to match the style
if (%interbracket_arrow_style) {
my $style = $interbracket_arrow_style{ $type_mm . $type };
next if ( !$style );
next
if ( $style == -1 && $type_m ne '->'
|| $style == 1 && $type_m eq '->' );
}
next if ( $type_m eq '->' && !$rOpts_del && !$rOpts_warn );
next if ( $type_m ne '->' && !$rOpts_add && !$rOpts_warn );
# Do not continue if the left container is too complex..
# complexity flag = 0: only one nonblank token in the brackets
if ( !$complexity ) {
my $count = 0;
my $Ko_mm = $K_opening_container->{$seqno_mm};
next unless defined($Ko_mm);
foreach my $KK ( $Ko_mm + 1 .. $K_mm - 2 ) {
next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
$count++;
last if ( $count > 1 );
}
next if ( $count > 1 );
}
# complexity flag = 1: no interior container tokens
elsif ( $complexity == 1 ) {
if ( $seqno_mm ne $seqno - 1 ) {
next;
}
}
else {
# complexity flag >1 => no restriction
}
# set a flag telling sub respace_tokens to actually make the change
my $lno = 1 + $rLL->[$Ko]->[_LINE_INDEX_];
if ( $type_m eq '->' ) {
if ($rOpts_del) {
$self->[_rwant_arrow_before_seqno_]->{$seqno} = -1;
}
if ( $rOpts_del || $rOpts_warn_and_style ) { push @lno_del, $lno }
}
else {
if ($rOpts_add) {
$self->[_rwant_arrow_before_seqno_]->{$seqno} = 1;
}
if ( $rOpts_add || $rOpts_warn_and_style ) { push @lno_add, $lno }
}
}
if ($rOpts_warn) {
my $wia = '--warn-interbracket-arrows report:';
$warn->( \@lno_add, $rOpts_add ? "$wia added" : "$wia: missing" )
if (@lno_add);
$warn->( \@lno_del, $rOpts_del ? "$wia deleted " : "$wia: unwanted " )
if (@lno_del);
}
return;
} ## end sub interbracket_arrow_check
sub delete_side_comments {
my ( $self, $rix_side_comments ) = @_;
# Handle any requested side comment deletions.
# Given:
# $rix_side_comments = ref to list of indexes of lines with side comments
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rseqno_non_indenting_brace_by_ix =
$self->[_rseqno_non_indenting_brace_by_ix_];
foreach my $ix ( @{$rix_side_comments} ) {
my $line_of_tokens = $rlines->[$ix];
my $line_type = $line_of_tokens->{_line_type};
# This fault shouldn't happen because we only saved CODE lines with
# side comments in the TASK 1 loop above.
if ( $line_type ne 'CODE' ) {
if (DEVEL_MODE) {
my $lno = $ix + 1;
Fault(<<EOM);
Hit unexpected line_type = '$line_type' near line $lno while deleting side comments, should be 'CODE'
EOM
}
next;
}
my $CODE_type = $line_of_tokens->{_code_type};
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
if ( !defined($Kfirst) || $rLL->[$Klast]->[_TYPE_] ne '#' ) {
if (DEVEL_MODE) {
my $lno = $ix + 1;
Fault(<<EOM);
Did not find side comment near line $lno while deleting side comments
EOM
}
next;
}
my $delete_side_comment =
$rOpts_delete_side_comments
&& ( $Klast > $Kfirst || $CODE_type eq 'HSC' )
&& (!$CODE_type
|| $CODE_type eq 'HSC'
|| $CODE_type eq 'IO'
|| $CODE_type eq 'NIN' );
# Do not delete special control side comments
if ( $rseqno_non_indenting_brace_by_ix->{$ix} ) {
$delete_side_comment = 0;
}
if (
$rOpts_delete_closing_side_comments
&& !$delete_side_comment
&& $Klast > $Kfirst
&& ( !$CODE_type
|| $CODE_type eq 'HSC'
|| $CODE_type eq 'IO'
|| $CODE_type eq 'NIN' )
)
{
my $token = $rLL->[$Klast]->[_TOKEN_];
my $K_m = $Klast - 1;
my $type_m = $rLL->[$K_m]->[_TYPE_];
if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
my $seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
# patch to delete asub csc's (c380)
if ( !$seqno_m && $K_m && $rLL->[$K_m]->[_TYPE_] eq ';' ) {
$K_m = $K_m - 1;
$type_m = $rLL->[$K_m]->[_TYPE_];
if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
if ( $K_m == $Kfirst ) {
$seqno_m = $rLL->[$K_m]->[_TYPE_SEQUENCE_];
}
}
if ($seqno_m) {
my $block_type_m = $rblock_type_of_seqno->{$seqno_m};
if ( $block_type_m
&& $token =~ /$closing_side_comment_prefix_pattern/
&& $block_type_m =~ /$closing_side_comment_list_pattern/
&& $block_type_m !~
/$closing_side_comment_exclusion_pattern/ )
{
$delete_side_comment = 1;
}
}
} ## end if ( $rOpts_delete_closing_side_comments...)
if ($delete_side_comment) {
# We are actually just changing the side comment to a blank.
# This may produce multiple blanks in a row, but sub respace_tokens
# will check for this and fix it.
$rLL->[$Klast]->[_TYPE_] = 'b';
$rLL->[$Klast]->[_TOKEN_] = SPACE;
# The -io option outputs the line text, so we have to update
# the line text so that the comment does not reappear.
if ( $CODE_type eq 'IO' ) {
my $line = EMPTY_STRING;
foreach my $KK ( $Kfirst .. $Klast - 1 ) {
$line .= $rLL->[$KK]->[_TOKEN_];
}
$line =~ s/\s+$//;
$line_of_tokens->{_line_text} = $line . "\n";
}
# If we delete a hanging side comment the line becomes blank.
if ( $CODE_type eq 'HSC' ) { $line_of_tokens->{_code_type} = 'BL' }
}
}
return;
} ## end sub delete_side_comments
my %wU;
my %wiq;
my %is_wit;
my %is_nonlist_keyword;
my %is_nonlist_type;
my %is_unexpected_equals;
my %is_ascii_type;
BEGIN {
# added 'U' to fix cases b1125 b1126 b1127
my @q = qw( w U );
@wU{@q} = (1) x scalar(@q);
@q = qw( w i q Q G C Z );
@wiq{@q} = (1) x scalar(@q);
@q = qw( w i t ); # for c250: added new types 'P', 'S', formerly 'i'
@is_wit{@q} = (1) x scalar(@q);
# Parens following these keywords will not be marked as lists. Note that
# 'for' is not included and is handled separately, by including 'f' in the
# hash %is_counted_type, since it may or may not be a c-style for loop.
@q = qw( if elsif unless and or );
@is_nonlist_keyword{@q} = (1) x scalar(@q);
# Parens following these types will not be marked as lists
@q = qw( && || );
@is_nonlist_type{@q} = (1) x scalar(@q);
@q = qw( = == != );
@is_unexpected_equals{@q} = (1) x scalar(@q);
# We can always skip expensive length_function->() calls for these
# ascii token types
@q = qw#
b k L R ; ( { [ ? : ] } ) f t n v F p m pp mm
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
( ) <= >= == =~ !~ != ++ -- /= x=
... **= <<= >>= &&= ||= //= <=>
+ - / * | % ! x ~ = \ ? : . < > ^ &
#;
push @q, ',';
@is_ascii_type{@q} = (1) x scalar(@q);
} ## end BEGIN
{ #<<< begin closure respace_tokens
my $rLL_new; # This will be the new array of tokens
# These are variables in $self
my $rLL;
my $length_function;
my $K_closing_ternary;
my $K_opening_ternary;
my $rchildren_of_seqno;
my $rhas_broken_code_block;
my $rhas_broken_list;
my $rhas_broken_list_with_lec;
my $rhas_code_block;
my $rhas_list;
my $rhas_ternary;
my $ris_assigned_structure;
my $ris_broken_container;
my $ris_excluded_lp_container;
my $ris_list_by_seqno;
my $ris_permanently_broken;
my $rlec_count_by_seqno;
my $roverride_cab3;
my $rparent_of_seqno;
my $rtype_count_by_seqno;
my $rblock_type_of_seqno;
my $rwant_arrow_before_seqno;
my $ris_sub_block;
my $ris_asub_block;
my $rseqno_arrow_call_chain_start;
my $rarrow_call_chain;
my $K_opening_container;
my $K_closing_container;
my @K_sequenced_token_list;
my @seqno_paren_arrow;
my %K_first_here_doc_by_seqno;
my $last_nonblank_code_type;
my $last_nonblank_code_token;
my $last_nonblank_block_type;
my $last_last_nonblank_code_type;
my $last_last_nonblank_code_token;
my $K_last_S;
my $K_last_S_is_my;
my %seqno_stack;
my %K_old_opening_by_seqno;
my $depth_next;
my $depth_next_max;
my @sub_seqno_stack;
my $current_sub_seqno;
my $cumulative_length;
# Variables holding the current line info
my $Ktoken_vars;
my $Kfirst_old;
my $Klast_old;
my $Klast_old_code;
my $CODE_type;
my $rwhitespace_flags;
# new index K of package or class statements
my $rK_package_list;
# new index K of @_ tokens
my $rK_AT_underscore_by_sub_seqno;
# new index K of first $self tokens for each sub
my $rK_first_self_by_sub_seqno;
# new index K of first 'bless' for each sub
my $rK_bless_by_sub_seqno;
# new index K of 'return' for each sub
my $rK_return_by_sub_seqno;
# new index K of 'wantarray' for each sub
my $rK_wantarray_by_sub_seqno;
# info about list of sub call args
my $rsub_call_paren_info_by_seqno;
my $rDOLLAR_underscore_by_sub_seqno;
# index K of the preceding 'S' token for a sub
my $rK_sub_by_seqno;
# true for a 'my' sub
my $ris_my_sub_by_seqno;
sub initialize_respace_tokens_closure {
my ($self) = @_;
$rLL_new = []; # This is the new array
$rLL = $self->[_rLL_];
$length_function = $self->[_length_function_];
$K_closing_ternary = $self->[_K_closing_ternary_];
$K_opening_ternary = $self->[_K_opening_ternary_];
$rchildren_of_seqno = $self->[_rchildren_of_seqno_];
$rhas_broken_code_block = $self->[_rhas_broken_code_block_];
$rhas_broken_list = $self->[_rhas_broken_list_];
$rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
$rhas_code_block = $self->[_rhas_code_block_];
$rhas_list = $self->[_rhas_list_];
$rhas_ternary = $self->[_rhas_ternary_];
$ris_assigned_structure = $self->[_ris_assigned_structure_];
$ris_broken_container = $self->[_ris_broken_container_];
$ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
$ris_list_by_seqno = $self->[_ris_list_by_seqno_];
$ris_permanently_broken = $self->[_ris_permanently_broken_];
$rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
$roverride_cab3 = $self->[_roverride_cab3_];
$rparent_of_seqno = $self->[_rparent_of_seqno_];
$rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
$rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
$rwant_arrow_before_seqno = $self->[_rwant_arrow_before_seqno_];
$ris_sub_block = $self->[_ris_sub_block_];
$ris_asub_block = $self->[_ris_asub_block_];
$rK_package_list = $self->[_rK_package_list_];
$rK_AT_underscore_by_sub_seqno = $self->[_rK_AT_underscore_by_sub_seqno_];
$rK_first_self_by_sub_seqno = $self->[_rK_first_self_by_sub_seqno_];
$rK_bless_by_sub_seqno = $self->[_rK_bless_by_sub_seqno_];
$rK_return_by_sub_seqno = $self->[_rK_return_by_sub_seqno_];
$rK_wantarray_by_sub_seqno = $self->[_rK_wantarray_by_sub_seqno_];
$rsub_call_paren_info_by_seqno = $self->[_rsub_call_paren_info_by_seqno_];
$rseqno_arrow_call_chain_start = $self->[_rseqno_arrow_call_chain_start_];
$rarrow_call_chain = $self->[_rarrow_call_chain_];
$rDOLLAR_underscore_by_sub_seqno =
$self->[_rDOLLAR_underscore_by_sub_seqno_];
$rK_sub_by_seqno = $self->[_rK_sub_by_seqno_];
$ris_my_sub_by_seqno = $self->[_ris_my_sub_by_seqno_];
%K_first_here_doc_by_seqno = ();
$last_nonblank_code_type = ';';
$last_nonblank_code_token = ';';
$last_nonblank_block_type = EMPTY_STRING;
$last_last_nonblank_code_type = ';';
$last_last_nonblank_code_token = ';';
$K_last_S = 1;
$K_last_S_is_my = undef;
%seqno_stack = ();
%K_old_opening_by_seqno = (); # Note: old K index
$depth_next = 0;
$depth_next_max = 0;
@sub_seqno_stack = ();
$current_sub_seqno = 0;
# we will be setting token lengths as we go
$cumulative_length = 0;
$Ktoken_vars = undef; # the old K value of $rtoken_vars
$Kfirst_old = undef; # min K of old line
$Klast_old = undef; # max K of old line
$Klast_old_code = undef; # K of last token if side comment
$CODE_type = EMPTY_STRING;
# Set the whitespace flags, which indicate the token spacing preference.
$rwhitespace_flags = $self->set_whitespace_flags();
# Note that $K_opening_container and $K_closing_container have values
# defined in sub get_line() for the previous K indexes. They were needed
# in case option 'indent-only' was set, and we didn't get here. We no
# longer need those and will eliminate them now to avoid any possible
# mixing of old and new values. This must be done AFTER the call to
# set_whitespace_flags, which needs these.
$K_opening_container = $self->[_K_opening_container_] = {};
$K_closing_container = $self->[_K_closing_container_] = {};
@K_sequenced_token_list = ();
# array for saving seqno's of ')->' for possible line breaks, git #171
@seqno_paren_arrow = ();
return;
} ## end sub initialize_respace_tokens_closure
sub respace_tokens {
my $self = shift;
# This routine is called once per file to do as much formatting as possible
# before new line breaks are set.
# Returns:
# $severe_error = true if processing must terminate immediately
# $rqw_lines = ref to list of lines with qw quotes (for -qwaf)
my ( $severe_error, $rqw_lines );
# We do not change any spaces in --indent-only mode
if ( $rOpts->{'indent-only'} ) {
# We need to define lengths for -indent-only to avoid undefs, even
# though these values are not actually needed for option --indent-only.
$rLL = $self->[_rLL_];
$cumulative_length = 0;
foreach my $item ( @{$rLL} ) {
my $token = $item->[_TOKEN_];
my $token_length =
$length_function ? $length_function->($token) : length($token);
$cumulative_length += $token_length;
$item->[_TOKEN_LENGTH_] = $token_length;
$item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
}
return ( $severe_error, $rqw_lines );
}
# This routine makes all necessary and possible changes to the tokenization
# after the initial tokenization of the file. This is a tedious routine,
# but basically it consists of inserting and deleting whitespace between
# nonblank tokens according to the selected parameters. In a few cases
# non-space characters are added, deleted or modified.
# The goal of this routine is to create a new token array which only needs
# the definition of new line breaks and padding to complete formatting. In
# a few cases we have to cheat a little to achieve this goal. In
# particular, we may not know if a semicolon will be needed, because it
# depends on how the line breaks go. To handle this, we include the
# semicolon as a 'phantom' which can be displayed as normal or as an empty
# string.
# Method: The old tokens are copied one-by-one, with changes, from the old
# linear storage array $rLL to a new array $rLL_new.
# (re-)initialize closure variables for this problem
$self->initialize_respace_tokens_closure();
#--------------------------------
# Main over all lines of the file
#--------------------------------
my $rlines = $self->[_rlines_];
my $line_type = EMPTY_STRING;
my $last_K_out;
foreach my $line_of_tokens ( @{$rlines} ) {
my $input_line_number = $line_of_tokens->{_line_number};
my $last_line_type = $line_type;
$line_type = $line_of_tokens->{_line_type};
next unless ( $line_type eq 'CODE' );
$CODE_type = $line_of_tokens->{_code_type};
if ( $CODE_type eq 'BL' ) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno) ) {
$self->[_rblank_and_comment_count_]->{$seqno} += 1;
if ( !$ris_permanently_broken->{$seqno}
&& $rOpts_maximum_consecutive_blank_lines )
{
$ris_permanently_broken->{$seqno} = 1;
$self->mark_parent_containers( $seqno,
$ris_permanently_broken );
}
}
}
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
next unless defined($Kfirst);
( $Kfirst_old, $Klast_old ) = ( $Kfirst, $Klast );
$Klast_old_code = $Klast_old;
# Be sure an old K value is defined for sub store_token
$Ktoken_vars = $Kfirst;
# Check for correct sequence of token indexes...
# An error here means that sub write_line() did not correctly
# package the tokenized lines as it received them. If we
# get a fault here it has not output a continuous sequence
# of K values. Or a line of CODE may have been mis-marked as
# something else. There is no good way to continue after such an
# error.
if ( defined($last_K_out) ) {
if ( $Kfirst != $last_K_out + 1 ) {
Fault_Warn(
"Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
);
$severe_error = 1;
return ( $severe_error, $rqw_lines );
}
}
else {
# The first token should always have been given index 0 by sub
# write_line()
if ( $Kfirst != 0 ) {
Fault("Program Bug: first K is $Kfirst but should be 0");
}
}
$last_K_out = $Klast;
# Handle special lines of code
if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
# CODE_types are as follows.
# 'BL' = Blank Line
# 'VB' = Verbatim - line goes out verbatim
# 'FS' = Format Skipping - line goes out verbatim, no blanks
# 'IO' = Indent Only - only indentation may be changed
# 'NIN' = No Internal Newlines - line does not get broken
# 'HSC'=Hanging Side Comment - fix this hanging side comment
# 'BC'=Block Comment - an ordinary full line comment
# 'SBC'=Static Block Comment - a block comment which does not get
# indented
# 'SBCX'=Static Block Comment Without Leading Space
# 'VER'=VERSION statement
# '' or (undefined) - no restrictions
# Copy tokens unchanged
foreach my $KK ( $Kfirst .. $Klast ) {
$Ktoken_vars = $KK;
$self->store_token( $rLL->[$KK] );
}
next;
}
# Handle normal line..
# Define index of last token before any side comment for comma counts
my $type_end = $rLL->[$Klast_old_code]->[_TYPE_];
if ( ( $type_end eq '#' || $type_end eq 'b' )
&& $Klast_old_code > $Kfirst_old )
{
$Klast_old_code--;
if ( $rLL->[$Klast_old_code]->[_TYPE_] eq 'b'
&& $Klast_old_code > $Kfirst_old )
{
$Klast_old_code--;
}
}
# Insert any essential whitespace between lines
# if last line was normal CODE.
# Patch for rt #125012: use K_previous_code rather than '_nonblank'
# because comments may disappear.
# Note that we must do this even if --noadd-whitespace is set
if ( $last_line_type eq 'CODE' ) {
if (
is_essential_whitespace(
$last_last_nonblank_code_token,
$last_last_nonblank_code_type,
$last_nonblank_code_token,
$last_nonblank_code_type,
$rLL->[$Kfirst]->[_TOKEN_],
$rLL->[$Kfirst]->[_TYPE_],
)
)
{
$self->store_token();
}
}
#-----------------------------------------------
# Inner loop to respace tokens on a line of code
#-----------------------------------------------
# The inner loop is in a separate sub for clarity
$self->respace_tokens_inner_loop( $Kfirst, $Klast, $input_line_number );
if ( $line_of_tokens->{_ending_in_quote} ) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno) ) {
$ris_permanently_broken->{$seqno} = 1;
$self->mark_parent_containers( $seqno,
$ris_permanently_broken );
}
}
} # End line loop
# finalize data structures
$self->respace_post_loop_ops();
# Reset memory to be the new array
$self->[_rLL_] = $rLL_new;
my $Klimit;
if ( @{$rLL_new} ) { $Klimit = @{$rLL_new} - 1 }
$self->[_Klimit_] = $Klimit;
# During development, verify that the new array still looks okay.
DEVEL_MODE && $self->check_token_array();
# update the token limits of each line
( $severe_error, $rqw_lines ) = $self->resync_lines_and_tokens();
return ( $severe_error, $rqw_lines );
} ## end sub respace_tokens
sub respace_tokens_inner_loop {
my ( $self, $Kfirst, $Klast, $input_line_number ) = @_;
# Loop to copy all tokens on one line, making any spacing changes,
# while also collecting information needed by later subs.
# Given:
# $Kfirst = index of first token on this line
# $Klast = index of last token on this line
# $input_line_number = number of this line in input stream
my $type;
foreach my $KK ( $Kfirst .. $Klast ) {
# Update closure variable needed by sub store_token
$Ktoken_vars = $KK;
my $rtoken_vars = $rLL->[$KK];
# Handle a blank space ...
if ( ( $type = $rtoken_vars->[_TYPE_] ) eq 'b' ) {
# Delete it if not wanted by whitespace rules
# or we are deleting all whitespace
# Note that whitespace flag is a flag indicating whether a
# white space BEFORE the token is needed
next if ( $KK >= $Klast ); # skip terminal blank
my $Knext = $KK + 1;
if ($rOpts_freeze_whitespace) {
$self->store_token($rtoken_vars);
next;
}
my $ws = $rwhitespace_flags->[$Knext];
if ( $ws == WS_NO
|| $rOpts_delete_old_whitespace )
{
my $token_next = $rLL->[$Knext]->[_TOKEN_];
my $type_next = $rLL->[$Knext]->[_TYPE_];
my $do_not_delete = is_essential_whitespace(
$last_last_nonblank_code_token,
$last_last_nonblank_code_type,
$last_nonblank_code_token,
$last_nonblank_code_type,
$token_next,
$type_next,
);
# Note that repeated blanks will get filtered out here
next unless ($do_not_delete);
}
# make it just one character
$rtoken_vars->[_TOKEN_] = SPACE;
$self->store_token($rtoken_vars);
next;
}
my $token = $rtoken_vars->[_TOKEN_];
# Handle a sequenced token ... i.e. one of ( ) { } [ ] ? :
if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
# One of ) ] } ...
if ( $is_closing_token{$token} ) {
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
my $block_type = $rblock_type_of_seqno->{$type_sequence};
#---------------------------------------------
# check for semicolon addition in a code block
#---------------------------------------------
if ($block_type) {
# if not preceded by a ';' ..
if ( $last_nonblank_code_type ne ';' ) {
# tentatively insert a semicolon if appropriate
$self->add_phantom_semicolon($KK)
if $rOpts->{'add-semicolons'};
}
if ( $ris_sub_block->{$type_sequence}
|| $ris_asub_block->{$type_sequence} )
{
$current_sub_seqno = pop @sub_seqno_stack;
}
}
#----------------------------------------------------------
# check for addition/deletion of a trailing comma in a list
#----------------------------------------------------------
else {
# if this looks like a list ..
my $rtype_count = $rtype_count_by_seqno->{$type_sequence};
if ( !$rtype_count
|| !$rtype_count->{';'} && !$rtype_count->{'f'} )
{
# if NOT preceded by a comma..
if ( $last_nonblank_code_type ne ',' ) {
# insert a comma if requested
if (
$rOpts_add_trailing_commas
&& %trailing_comma_rules
# and...
&& (
# ... there is a comma or fat_comma
$rtype_count
&& ( $rtype_count->{','}
|| $rtype_count->{'=>'} )
# ... or exception for nested container
|| (
$rOpts_add_lone_trailing_commas
&& $is_closing_type{
$last_nonblank_code_type}
)
)
# and not preceded by '=>'
# (unusual but can occur in test files)
&& $last_nonblank_code_type ne '=>'
)
{
my $rule = $trailing_comma_rules{add};
if ( $rule && $rule->{$token} ) {
$self->add_trailing_comma( $KK, $Kfirst,
$rule->{$token} );
}
}
}
# if preceded by a comma ..
else {
# delete a trailing comma if requested
my $deleted;
if (
$rOpts_delete_trailing_commas
&& %trailing_comma_rules
&& $rtype_count
&& $rtype_count->{','}
&& ( $rOpts_delete_lone_trailing_commas
|| $rtype_count->{','} > 1
|| $rtype_count->{'=>'} )
# ignore zero-size qw commas
&& $last_nonblank_code_token
)
{
my $rule = $trailing_comma_rules{delete};
if ( $rule && $rule->{$token} ) {
$deleted =
$self->delete_trailing_comma( $KK,
$Kfirst, $rule->{$token} );
}
}
# delete a weld-interfering comma if requested
if ( !$deleted
&& $rOpts_delete_weld_interfering_commas
&& $is_closing_type{
$last_last_nonblank_code_type} )
{
$self->delete_weld_interfering_comma($KK);
}
}
}
}
}
# Opening container
else {
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( $rwant_arrow_before_seqno->{$type_sequence} ) {
# +1 means add -1 means delete previous arrow
if ( $rwant_arrow_before_seqno->{$type_sequence} > 0 ) {
$self->add_interbracket_arrow();
}
else {
$self->delete_interbracket_arrow();
$rwhitespace_flags->[$KK] = WS_NO;
}
}
# Save info for sub call arg count check
if ( $token eq '(' ) {
if (
# function(
$last_nonblank_code_type eq 'U'
|| $last_nonblank_code_type eq 'w'
# ->function(
|| ( $last_nonblank_code_type eq 'i'
&& $last_last_nonblank_code_type eq '->' )
# &function(
|| ( $last_nonblank_code_type eq 'i'
&& substr( $last_nonblank_code_token, 0, 1 ) eq
'&' )
)
{
$rsub_call_paren_info_by_seqno->{$type_sequence} = {
type_mm => $last_last_nonblank_code_type,
token_m => $last_nonblank_code_token,
};
}
}
# At a sub block, save info to cross check arg counts
elsif ( $ris_sub_block->{$type_sequence} ) {
$rK_sub_by_seqno->{$type_sequence} = $K_last_S;
if ($K_last_S_is_my) {
$ris_my_sub_by_seqno->{$type_sequence} = 1;
}
push @sub_seqno_stack, $current_sub_seqno;
$current_sub_seqno = $type_sequence;
}
elsif ( $ris_asub_block->{$type_sequence} ) {
push @sub_seqno_stack, $current_sub_seqno;
$current_sub_seqno = $type_sequence;
}
# Look for '$_[' for mismatched arg checks
elsif ($token eq '['
&& $last_nonblank_code_token eq '$_'
&& $current_sub_seqno )
{
push
@{ $rDOLLAR_underscore_by_sub_seqno->{$current_sub_seqno}
},
$type_sequence;
}
else {
## not a special opening token
}
}
}
# Modify certain tokens here for whitespace
# The following is not yet done, but could be:
# sub (x x x)
# ( $type =~ /^[wit]$/ )
elsif ( $is_wit{$type} ) {
# index() is several times faster than a regex test with \s here
## $token =~ /\s/
if ( index( $token, SPACE ) > 0 || index( $token, "\t" ) > 0 ) {
# change '$ var' to '$var' etc
# change '@ ' to '@'
# Examples: <<snippets/space1.in>>
my $ord = ord( substr( $token, 1, 1 ) );
if (
# quick test for possible blank at second char
$ord > 0 && ( $ord < ORD_PRINTABLE_MIN
|| $ord > ORD_PRINTABLE_MAX )
)
{
my ( $sigil, $word ) = split /\s+/, $token, 2;
# $sigil =~ /^[\$\&\%\*\@]$/ )
if ( $is_sigil{$sigil} ) {
$token = $sigil;
$token .= $word if ( defined($word) ); # fix c104
$rtoken_vars->[_TOKEN_] = $token;
}
}
# trim identifiers of trailing blanks which can occur
# under some unusual circumstances, such as if the
# identifier 'witch' has trailing blanks on input here:
#
# sub
# witch
# () # prototype may be on new line ...
# ...
my $ord_ch = ord( substr( $token, -1, 1 ) );
if (
# quick check for possible ending space
$ord_ch > 0 && ( $ord_ch < ORD_PRINTABLE_MIN
|| $ord_ch > ORD_PRINTABLE_MAX )
)
{
$token =~ s/\s+$//g;
$rtoken_vars->[_TOKEN_] = $token;
}
}
if ( $type eq 'i' ) {
if ( $token eq '@_' && $current_sub_seqno ) {
# remember the new K of this @_; this may be
# off by 1 if a blank gets inserted before it
push
@{ $rK_AT_underscore_by_sub_seqno->{$current_sub_seqno} },
scalar( @{$rLL_new} );
}
# Remember new K of the first '$self' in a sub for -dma option
if ( $token eq '$self' && $current_sub_seqno ) {
$rK_first_self_by_sub_seqno->{$current_sub_seqno} ||=
scalar( @{$rLL_new} );
}
# Remember new K and name of blessed objects for -dma option
if (
(
$last_nonblank_code_token eq 'bless'
&& $last_nonblank_code_type eq 'k'
)
|| (
$last_last_nonblank_code_token eq 'bless'
&& $last_last_nonblank_code_type eq 'k'
&& (
$last_nonblank_code_token eq 'my'
|| $last_nonblank_code_token eq '('
)
)
)
{
push @{ $rK_bless_by_sub_seqno->{$current_sub_seqno} },
[ scalar( @{$rLL_new} ), $token ];
}
}
elsif ( $type eq 'w' ) {
if ( $last_nonblank_code_token eq 'use'
&& $last_nonblank_code_type eq 'k' )
{
if ( $token eq 'strict' ) { $self->[_saw_use_strict_] = 1 }
}
}
else {
# Could be something like '* STDERR' or '$ debug'
}
}
# handle keywords
elsif ( $type eq 'k' ) {
if ( $token eq 'return' ) {
# remember the new K of this 'return; this may be
# off by 1 if a blank gets inserted before it
push
@{ $rK_return_by_sub_seqno->{$current_sub_seqno} },
scalar( @{$rLL_new} );
}
if ( $token eq 'wantarray' ) {
push
@{ $rK_wantarray_by_sub_seqno->{$current_sub_seqno} },
scalar( @{$rLL_new} );
}
}
# handle semicolons
elsif ( $type eq ';' ) {
# Remove unnecessary semicolons, but not after bare
# blocks, where it could be unsafe if the brace is
# mis-tokenized.
if (
$rOpts->{'delete-semicolons'}
&& (
(
$last_nonblank_block_type
&& $last_nonblank_code_type eq '}'
&& (
$is_block_without_semicolon{
$last_nonblank_block_type}
|| $last_nonblank_block_type =~ /$SUB_PATTERN/
|| $last_nonblank_block_type =~ /^\w+:$/
)
)
|| $last_nonblank_code_type eq ';'
)
)
{
# This looks like a deletable semicolon, but even if a
# semicolon can be deleted it is not necessarily best to do
# so. We apply these additional rules for deletion:
# - Always ok to delete a ';' at the end of a line
# - Never delete a ';' before a '#' because it would
# promote it to a block comment.
# - If a semicolon is not at the end of line, then only
# delete if it is followed by another semicolon or closing
# token. This includes the comment rule. It may take
# two passes to get to a final state, but it is a little
# safer. For example, keep the first semicolon here:
# eval { sub bubba { ok(0) }; ok(0) } || ok(1);
# It is not required but adds some clarity.
my $ok_to_delete = 1;
if ( $KK < $Klast ) {
my $Kn = $self->K_next_nonblank($KK);
if ( defined($Kn) && $Kn <= $Klast ) {
my $next_nonblank_token_type = $rLL->[$Kn]->[_TYPE_];
$ok_to_delete = $next_nonblank_token_type eq ';'
|| $next_nonblank_token_type eq '}';
}
}
# do not delete only nonblank token in a file
else {
my $Kp = $self->K_previous_code( undef, $rLL_new );
my $Kn = $self->K_next_nonblank($KK);
$ok_to_delete = defined($Kn) || defined($Kp);
}
if ($ok_to_delete) {
$self->note_deleted_semicolon($input_line_number);
next;
}
else {
write_logfile_entry("Extra ';'\n");
}
}
}
elsif ( $type eq '->' ) {
if ( $last_nonblank_code_token eq ')' ) {
# save seqno of closing paren with arrow, ')->', git #171
# (the paren seqno is still on the stack)
my $seqno_paren = $seqno_stack{$depth_next};
if ($seqno_paren) { push @seqno_paren_arrow, $seqno_paren }
}
}
# delete repeated commas if requested
elsif ( $type eq ',' ) {
if ( $last_nonblank_code_type eq ','
&& $rOpts->{'delete-repeated-commas'} )
{
# Do not delete the leading comma of a line with a side
# comment. This could promote the side comment to a block
# comment. See test 'mangle4.in'
my $lno = 1 + $rLL->[$KK]->[_LINE_INDEX_];
if ( $KK eq $Kfirst && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
complain(
"repeated comma before side comment, not deleted\n",
$lno );
}
else {
complain( "deleted repeated ','\n", $lno );
next;
}
}
elsif ($last_nonblank_code_type eq '=>'
&& $rOpts->{'delete-repeated-commas'} )
{
my $lno = 1 + $rLL->[$KK]->[_LINE_INDEX_];
complain( "found '=>,' ... error?\n", $lno );
}
else {
# not a repeated comma type
}
# remember input line index of first comma if -wtc is used
if (%trailing_comma_rules) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno)
&& !defined( $self->[_rfirst_comma_line_index_]->{$seqno} )
)
{
$self->[_rfirst_comma_line_index_]->{$seqno} =
$rtoken_vars->[_LINE_INDEX_];
}
}
}
# check a quote for problems
elsif ( $type eq 'Q' ) {
$self->check_Q( $KK, $Kfirst, $input_line_number )
if ( $self->[_save_logfile_] );
}
# Old patch to add space to something like "x10".
# Note: This is now done in the Tokenizer, but this code remains
# for reference.
elsif ( $type eq 'n' ) {
if ( substr( $token, 0, 1 ) eq 'x' && $token =~ /^x\d+/ ) {
$token =~ s/x/x /;
$rtoken_vars->[_TOKEN_] = $token;
if (DEVEL_MODE) {
Fault(<<EOM);
Near line $input_line_number, Unexpected need to split a token '$token' - this should now be done by the Tokenizer
EOM
}
}
}
elsif ( $type eq '=>' ) {
if ( $last_nonblank_code_type eq '=>'
&& $rOpts->{'delete-repeated-commas'} )
{
# Check for repeated '=>'s
# Note that ',=>' is useful and called a winking fat comma
# Do not delete the leading fat comma of a line with a side
# comment. This could promote the side comment to a block
# comment. See test 'mangle4.in'
my $lno = 1 + $rLL->[$KK]->[_LINE_INDEX_];
if ( $KK eq $Kfirst && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
complain(
"-repeated '=>' before side comment, not deleted\n",
$lno );
}
else {
complain( "deleted repeated '=>'\n", $lno );
next;
}
}
# remember input line index of first '=>' if -wtc is used
if (%trailing_comma_rules) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno)
&& !defined( $self->[_rfirst_comma_line_index_]->{$seqno} )
)
{
$self->[_rfirst_comma_line_index_]->{$seqno} =
$rtoken_vars->[_LINE_INDEX_];
}
}
}
# check for a qw quote
elsif ( $type eq 'q' ) {
# Trim spaces from right of qw quotes. Also trim from the left for
# safety (the tokenizer should have done this).
# To avoid trimming qw quotes use -ntqw; this causes the
# tokenizer to set them as type 'Q' instead of 'q'.
$token =~ s/^ \s+ | \s+ $//gx;
$rtoken_vars->[_TOKEN_] = $token;
if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
$self->note_embedded_tab($input_line_number);
}
if ( $rwhitespace_flags->[$KK] == WS_YES
&& @{$rLL_new}
&& $rLL_new->[-1]->[_TYPE_] ne 'b'
&& $rOpts_add_whitespace )
{
$self->store_token();
}
$self->store_token($rtoken_vars);
next;
}
# Remove space after '<<'. Note that perl may use a space after
# '<<' to guess tokenization for numeric targets. See git #174.
elsif ( $type eq 'h' ) {
if ( index( $token, SPACE ) > 0 || index( $token, "\t" ) > 0 ) {
if ( $token =~ /^ (\<\<\~?) \s+ ([^\d].*) $/x ) {
$token = $1 . $2;
$rtoken_vars->[_TOKEN_] = $token;
}
}
}
elsif ( $type eq 'S' ) {
# Trim spaces in sub definitions
# save the NEW index of this token which will normally
# be @{$rLL_new} plus 1 because a blank is usually inserted
# ahead of it. The user routine will back up if necessary.
# Note that an isolated prototype starting on new line will
# be marked as 'S' but start with '(' and must be skipped.
if ( substr( $token, 0, 1 ) ne '(' ) {
$K_last_S = @{$rLL_new} + 1;
# also, remember if this is a 'my' sub
$K_last_S_is_my = $last_nonblank_code_type eq 'k'
&& (
$last_nonblank_code_token eq 'my'
|| ( $last_nonblank_code_token eq 'sub'
&& $last_last_nonblank_code_type eq 'k'
&& $last_last_nonblank_code_token eq 'my' )
);
}
# Note: an asub with prototype like this will come this way
# and be partially treated as a named sub
# sub () {
# -spp = 0 : no space before opening prototype paren
# -spp = 1 : stable (follow input spacing)
# -spp = 2 : always space before opening prototype paren
if ( !defined($rOpts_space_prototype_paren)
|| $rOpts_space_prototype_paren == 1 )
{
## default: stable
}
elsif ( $rOpts_space_prototype_paren == 0 ) {
$token =~ s/\s+\(/\(/;
}
elsif ( $rOpts_space_prototype_paren == 2 ) {
$token =~ s/\(/ (/;
}
else {
## should have been caught with the integer range check
## continue with the default
DEVEL_MODE && Fault(<<EOM);
unexpected integer value space-prototype-paren=$rOpts_space_prototype_paren
EOM
}
# one space max, and no tabs
$token =~ s/\s+/ /g;
$rtoken_vars->[_TOKEN_] = $token;
$self->[_ris_special_identifier_token_]->{$token} = 'sub';
}
# and trim spaces in package statements (added for c250)
elsif ( $type eq 'P' ) {
# clean up spaces in package identifiers, like
# "package Bob::Dog;"
if ( $token =~ s/\s+/ /g ) {
$rtoken_vars->[_TOKEN_] = $token;
$self->[_ris_special_identifier_token_]->{$token} = 'package';
}
# remember the new K of this package; this may be
# off by 1 if a blank gets inserted before it
push @{$rK_package_list}, scalar( @{$rLL_new} );
}
# change 'LABEL :' to 'LABEL:'
elsif ( $type eq 'J' ) {
$token =~ s/\s+//g;
$rtoken_vars->[_TOKEN_] = $token;
}
else {
# no special processing for this token type
}
# Store this token with possible previous blank
if ( $rwhitespace_flags->[$KK] == WS_YES
&& @{$rLL_new}
&& $rLL_new->[-1]->[_TYPE_] ne 'b'
&& $rOpts_add_whitespace )
{
$self->store_token();
}
$self->store_token($rtoken_vars);
} # End token loop
return;
} ## end sub respace_tokens_inner_loop
sub respace_post_loop_ops {
my ($self) = @_;
# We have just completed the 'respace' operation, in which we have made
# a pass through all tokens and set the whitespace between tokens to be
# according to user settings. The new tokens have been placed in the new
# token list '$rLL_new'. Now we have to go through this new list and
# define some indexes which allow quick access into it.
return unless ( @{$rLL_new} );
# Setup array for finding the next sequence number after any token
my @K_next_seqno_by_K;
my $K_last = 0;
foreach my $K (@K_sequenced_token_list) {
push @K_next_seqno_by_K, ($K) x ( $K - $K_last );
$K_last = $K;
}
# Note: here is the slow way to do the above loop (100 ms)
## foreach my $KK ( $K_last .. $K - 1 ) {
## $K_next_seqno_by_K[$KK] = $K;
## }
# This is faster (63 ms)
## my @q = ( $K_last .. $K - 1 );
## @K_next_seqno_by_K[@q] = ($K) x scalar(@q);
# The push method above is fastest, at 37 ms in my benchmark.
$self->[_rK_next_seqno_by_K_] = \@K_next_seqno_by_K;
$self->[_rK_sequenced_token_list_] = \@K_sequenced_token_list;
# Verify that arrays @K_sequenced_token_list and @{$rSS} are parallel
# arrays, meaning that they have a common array index 'I'. This index maybe
# be found by seqno with rI_container and rI_closing.
if (DEVEL_MODE) {
my $num_rSS = @{ $self->[_rSS_] };
my $num_Kseq = @K_sequenced_token_list;
# If this error occurs, we have gained or lost one or more of the
# sequenced tokens received from the tokenizer. This should never
# happen.
if ( $num_rSS != $num_Kseq ) {
Fault(<<EOM);
num_rSS= $num_rSS != num_Kseq=$num_Kseq
EOM
}
}
# Find and remember lists by sequence number
foreach my $seqno ( keys %{$K_opening_container} ) {
my $K_opening = $K_opening_container->{$seqno};
next unless defined($K_opening);
# code errors may leave undefined closing tokens
my $K_closing = $K_closing_container->{$seqno};
next unless defined($K_closing);
my $lx_open = $rLL_new->[$K_opening]->[_LINE_INDEX_];
my $lx_close = $rLL_new->[$K_closing]->[_LINE_INDEX_];
my $line_diff = $lx_close - $lx_open;
$ris_broken_container->{$seqno} = $line_diff;
# See if this is a list
my $is_list;
my $rtype_count = $rtype_count_by_seqno->{$seqno};
if ($rtype_count) {
# We will define a list to be a container with one or more commas
# and no semicolons.
my $token_opening = $rLL_new->[$K_opening]->[_TOKEN_];
if ( $rtype_count->{';'} ) {
# Not a list .. check for possible error. For now, just see if
# this ';' is in a '(' or '[' container. Checking type '{' is
# tricky and not done yet.
if ( $token_opening eq '(' || $token_opening eq '[' ) {
my $lno = $rLL_new->[$K_opening]->[_LINE_INDEX_] + 1;
complain(<<EOM);
Unexpected ';' in container beginning with '$token_opening' at line $lno
EOM
}
}
# Type 'f' is semicolon in a c-style 'for' statement
elsif ( $rtype_count->{'f'} ) {
## not a list
}
elsif ( $rtype_count->{','} || $rtype_count->{'=>'} ) {
# has commas but no semicolons
$is_list = 1;
# We need to do one more check for a parenthesized list:
# At an opening paren following certain tokens, such as 'if',
# we do not want to format the contents as a list.
if ( $token_opening eq '(' ) {
my $Kp = $self->K_previous_code( $K_opening, $rLL_new );
if ( defined($Kp) ) {
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
$is_list =
$type_p eq 'k'
? !$is_nonlist_keyword{$token_p}
: !$is_nonlist_type{$type_p};
}
}
}
else {
## no commas or semicolons - not a list
}
}
# Look for a block brace marked as uncertain. If the tokenizer thinks
# its guess is uncertain for the type of a brace following an unknown
# bareword then it adds a trailing space as a signal. We can fix the
# type here now that we have had a better look at the contents of the
# container. This fixes case b1085. To find the corresponding code in
# Tokenizer.pm search for 'b1085' with an editor.
my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type && substr( $block_type, -1, 1 ) eq SPACE ) {
# Always remove the trailing space
$block_type =~ s/\s+$//;
# Try to filter out parenless sub calls
my $Knn1 = $self->K_next_nonblank( $K_opening, $rLL_new );
my $Knn2;
if ( defined($Knn1) ) {
$Knn2 = $self->K_next_nonblank( $Knn1, $rLL_new );
}
my $type_nn1 = defined($Knn1) ? $rLL_new->[$Knn1]->[_TYPE_] : 'b';
my $type_nn2 = defined($Knn2) ? $rLL_new->[$Knn2]->[_TYPE_] : 'b';
# if ( $type_nn1 =~ /^[wU]$/ && $type_nn2 =~ /^[wiqQGCZ]$/ ) {
if ( $wU{$type_nn1} && $wiq{$type_nn2} ) {
$is_list = 0;
}
# Convert to a hash brace if it looks like it holds a list
if ($is_list) {
$block_type = EMPTY_STRING;
}
$rblock_type_of_seqno->{$seqno} = $block_type;
}
# Handle a list container
if ( $is_list && !$block_type ) {
$ris_list_by_seqno->{$seqno} = $seqno;
# Update parent container properties
my $depth = 0;
my $rparent_seqno_list = $self->get_parent_containers($seqno);
foreach my $seqno_parent ( @{$rparent_seqno_list} ) {
$depth++;
# for $rhas_list we need to save the minimum depth
if ( !$rhas_list->{$seqno_parent}
|| $rhas_list->{$seqno_parent} > $depth )
{
$rhas_list->{$seqno_parent} = $depth;
}
if ($line_diff) {
$rhas_broken_list->{$seqno_parent} = 1;
# Patch1: We need to mark broken lists with non-terminal
# line-ending commas for the -bbx=2 parameter. This insures
# that the list will stay broken. Otherwise the flag
# -bbx=2 can be unstable. This fixes case b789 and b938.
# Patch2: Updated to also require either one fat comma or
# one more line-ending comma. Fixes cases b1069 b1070
# b1072 b1076.
if (
$rlec_count_by_seqno->{$seqno}
&& ( $rlec_count_by_seqno->{$seqno} > 1
|| $rtype_count_by_seqno->{$seqno}->{'=>'} )
)
{
$rhas_broken_list_with_lec->{$seqno_parent} = 1;
}
}
}
}
# Handle code blocks ...
# The -lp option needs to know if a container holds a code block
elsif ( $block_type && $rOpts_line_up_parentheses ) {
# Update parent container properties
my $rparent_seqno_list = $self->get_parent_containers($seqno);
foreach my $seqno_parent ( @{$rparent_seqno_list} ) {
$rhas_code_block->{$seqno_parent} = 1;
$rhas_broken_code_block->{$seqno_parent} = $line_diff;
}
}
else {
# nothing special to do for this container token
}
}
# Find containers with ternaries, needed for -lp formatting.
foreach my $seqno ( keys %{$K_opening_ternary} ) {
# Update parent container properties
$self->mark_parent_containers( $seqno, $rhas_ternary );
}
# Turn off -lp for containers with here-docs with text within a container,
# since they have their own fixed indentation. Fixes case b1081.
if ($rOpts_line_up_parentheses) {
foreach my $seqno ( keys %K_first_here_doc_by_seqno ) {
my $Kh = $K_first_here_doc_by_seqno{$seqno};
my $Kc = $K_closing_container->{$seqno};
my $line_Kh = $rLL_new->[$Kh]->[_LINE_INDEX_];
my $line_Kc = $rLL_new->[$Kc]->[_LINE_INDEX_];
next if ( $line_Kh == $line_Kc );
$ris_excluded_lp_container->{$seqno} = 1;
}
}
# Set a flag to turn off -cab=3 in complex structures. Otherwise,
# instability can occur. When it is overridden the behavior of the closest
# match, -cab=2, will be used instead. This fixes cases b1096 b1113.
if ( $rOpts_comma_arrow_breakpoints == 3 ) {
foreach my $seqno ( keys %{$K_opening_container} ) {
my $rtype_count = $rtype_count_by_seqno->{$seqno};
next unless ( $rtype_count && $rtype_count->{'=>'} );
# override -cab=3 if this contains a sub-list
if ( !defined( $roverride_cab3->{$seqno} ) ) {
if ( $rhas_list->{$seqno} ) {
$roverride_cab3->{$seqno} = 2;
}
# or if this is a sub-list of its parent container
else {
my $seqno_parent = $rparent_of_seqno->{$seqno};
if ( defined($seqno_parent)
&& $ris_list_by_seqno->{$seqno_parent} )
{
$roverride_cab3->{$seqno} = 2;
}
}
}
}
}
# Search for chains of method calls of the form (git #171)
# )->xxx( )->xxx( )->
# We have previously saved the seqno of all ')->' combinations
my $in_chain_seqno = 0;
while ( my $seqno = shift @seqno_paren_arrow ) {
# ) -> func (
# ) -> func (
# $Kc--^ ^--$K_test
my $Kc = $K_closing_container->{$seqno};
my $K_arrow = $self->K_next_nonblank( $Kc, $rLL_new );
my $K_func = $self->K_next_nonblank( $K_arrow, $rLL_new );
my $K_test = $self->K_next_nonblank( $K_func, $rLL_new );
last if ( !defined($K_test) );
# ignore index operation like ')->{' or ')->[' and end any chain
my $tok = $rLL_new->[$K_func]->[_TOKEN_];
if ( $tok eq '[' || $tok eq '{' ) { $in_chain_seqno = 0; next }
# mark seqno of parens which are part of a call chain
my $seqno_start = $in_chain_seqno ? $in_chain_seqno : $seqno;
$rseqno_arrow_call_chain_start->{$seqno} = $seqno_start;
# save a list of the arrows, needed to set line breaks
push @{ $rarrow_call_chain->{$seqno_start} }, $K_arrow;
# See if this chain continues
if ( @seqno_paren_arrow
&& defined($K_test)
&& $rLL_new->[$K_test]->[_TOKEN_] eq '('
&& $rLL_new->[$K_test]->[_TYPE_SEQUENCE_] eq $seqno_paren_arrow[0] )
{
$in_chain_seqno ||= $seqno;
}
else { $in_chain_seqno = 0 }
} ## end while ( my $seqno = shift...)
# For efficiency, remove chains with length < 2
foreach my $seqno ( keys %{$rseqno_arrow_call_chain_start} ) {
my $seqno_start = $rseqno_arrow_call_chain_start->{$seqno};
if ( @{ $rarrow_call_chain->{$seqno_start} } < 2 ) {
delete $rseqno_arrow_call_chain_start->{$seqno};
delete $rarrow_call_chain->{$seqno_start};
}
}
return;
} ## end sub respace_post_loop_ops
sub store_token {
my ( $self, ($item) ) = @_;
# Store one token during respace operations
# Given:
# $item =
# if defined => reference to a token to be stored
# if not defined => make and store a blank space
# NOTE: this sub is called once per token so coding efficiency is critical.
# If no arg, then make and store a blank space
if ( !$item ) {
# - Never start the array with a space, and
# - Never store two consecutive spaces
if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] ne 'b' ) {
# Note that the level and ci_level of newly created spaces should
# be the same as the previous token. Otherwise the coding for the
# -lp option can create a blinking state in some rare cases.
# (see b1109, b1110).
$item = [];
$item->[_TYPE_] = 'b';
$item->[_TOKEN_] = SPACE;
$item->[_TYPE_SEQUENCE_] = EMPTY_STRING;
$item->[_LINE_INDEX_] = $rLL_new->[-1]->[_LINE_INDEX_];
$item->[_LEVEL_] = $rLL_new->[-1]->[_LEVEL_];
}
else { return }
}
# The next multiple assignment statements are significantly faster than
# doing them one-by-one.
my (
$type,
$token,
$type_sequence,
) = @{$item}[
_TYPE_,
_TOKEN_,
_TYPE_SEQUENCE_,
];
# Set the token length. Later it may be adjusted again if phantom or
# ignoring side comment lengths. It is always okay to calculate the length
# with $length_function->() if it is defined, but it is extremely slow so
# we avoid it and use the builtin length() for printable ascii tokens.
# Note: non-printable ascii characters (like tab) may get different lengths
# by the two methods, so we have to use $length_function for them.
my $token_length =
( $length_function
&& !$is_ascii_type{$type}
&& $token =~ /[[:^ascii:][:^print:]]/ )
? $length_function->($token)
: length($token);
# handle blanks
if ( $type eq 'b' ) {
# Do not output consecutive blanks. This situation should have been
# prevented earlier, but it is worth checking because later routines
# make this assumption.
if ( @{$rLL_new} && $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
return;
}
}
# handle comments
elsif ( $type eq '#' ) {
# trim comments if necessary
my $ord = ord( substr( $token, -1, 1 ) );
if (
$ord > 0
&& ( $ord < ORD_PRINTABLE_MIN
|| $ord > ORD_PRINTABLE_MAX )
&& $token =~ s/\s+$//
)
{
$token_length =
$length_function ? $length_function->($token) : length($token);
$item->[_TOKEN_] = $token;
}
my $ignore_sc_length = $rOpts_ignore_side_comment_lengths;
# Ignore length of '## no critic' comments even if -iscl is not set
if ( !$ignore_sc_length
&& !$rOpts_ignore_perlcritic_comments
&& $token_length > 10
&& substr( $token, 1, 1 ) eq '#'
&& $token =~ /^##\s*no\s+critic\b/ )
{
# Is it a side comment or a block comment?
if ( $Ktoken_vars > $Kfirst_old ) {
# This is a side comment. If we do not ignore its length, and
# -iscl has not been set, then the line could be broken and
# perlcritic will complain. So this is essential:
$ignore_sc_length ||= 1;
# It would be a good idea to also make this behave like a
# static side comment, but this is not essential and would
# change existing formatting. So we will leave it to the user
# to set -ssc if desired.
}
else {
# This is a full-line (block) comment.
# It would be a good idea to make this behave like a static
# block comment, but this is not essential and would change
# existing formatting. So we will leave it to the user to
# set -sbc if desired
}
}
# Set length of ignored side comments as just 1
if ( $ignore_sc_length && ( !$CODE_type || $CODE_type eq 'HSC' ) ) {
$token_length = 1;
}
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno) ) {
$self->[_rblank_and_comment_count_]->{$seqno} += 1
if ( $CODE_type eq 'BC' );
if ( !$ris_permanently_broken->{$seqno} ) {
$ris_permanently_broken->{$seqno} = 1;
$self->mark_parent_containers( $seqno,
$ris_permanently_broken );
}
}
}
# handle non-blanks and non-comments
else {
my $block_type;
# check for a sequenced item (i.e., container or ?/:)
if ($type_sequence) {
# This will be the index of this item in the new array
my $KK_new = @{$rLL_new};
# remember new K of sequence tokens
push @K_sequenced_token_list, $KK_new;
if ( $is_opening_token{$token} ) {
$K_opening_container->{$type_sequence} = $KK_new;
$block_type = $rblock_type_of_seqno->{$type_sequence};
# Fix for case b1100: Count a line ending in ', [' as having
# a line-ending comma. Otherwise, these commas can be hidden
# with something like --opening-square-bracket-right
if ( $last_nonblank_code_type eq ','
&& $Ktoken_vars == $Klast_old_code
&& $Ktoken_vars > $Kfirst_old )
{
$rlec_count_by_seqno->{$type_sequence}++;
}
if ( $last_nonblank_code_type eq '='
|| $last_nonblank_code_type eq '=>' )
{
$ris_assigned_structure->{$type_sequence} =
$last_nonblank_code_type;
}
my $seqno_parent = $seqno_stack{ $depth_next - 1 };
$seqno_parent = SEQ_ROOT unless defined($seqno_parent);
push @{ $rchildren_of_seqno->{$seqno_parent} }, $type_sequence;
$rparent_of_seqno->{$type_sequence} = $seqno_parent;
$seqno_stack{$depth_next} = $type_sequence;
$K_old_opening_by_seqno{$type_sequence} = $Ktoken_vars;
$depth_next++;
if ( $depth_next > $depth_next_max ) {
$depth_next_max = $depth_next;
}
}
elsif ( $is_closing_token{$token} ) {
$K_closing_container->{$type_sequence} = $KK_new;
$block_type = $rblock_type_of_seqno->{$type_sequence};
# Do not include terminal commas in counts
if ( $last_nonblank_code_type eq ','
|| $last_nonblank_code_type eq '=>' )
{
$rtype_count_by_seqno->{$type_sequence}
->{$last_nonblank_code_type}--;
if ( $Ktoken_vars == $Kfirst_old
&& $last_nonblank_code_type eq ','
&& $rlec_count_by_seqno->{$type_sequence} )
{
$rlec_count_by_seqno->{$type_sequence}--;
}
# set flag to retain trailing comma breaks (b1493, c416)
# length check needed to ignore phantom commas (b1496)
if ( $last_nonblank_code_type eq ','
&& $trailing_comma_break_rules{$token}
&& length($last_nonblank_code_token) )
{
my $rule = $trailing_comma_break_rules{$token};
my ( $letter, $paren_flag ) = @{$rule};
my $match;
if ( $letter eq 'b' ) {
$match = $Ktoken_vars == $Kfirst_old;
}
elsif ( $letter eq 'm' ) {
$match = $K_old_opening_by_seqno{$type_sequence} <
$Kfirst_old;
}
elsif ( $letter eq '1' || $letter eq '*' ) {
$match = 1;
}
else {
## shouldn't happen - treat as 'b' for now
DEVEL_MODE && Fault(<<EOM);
unexpected option '$letter' for --trailing-comma-break-flag at token '$token'
EOM
$match = $Ktoken_vars == $Kfirst_old;
}
if ( $match && $paren_flag && $token eq ')' ) {
$match &&=
$self->match_paren_control_flag( $type_sequence,
$paren_flag, $rLL_new );
}
if ($match) {
$self->[_rbreak_container_]->{$type_sequence} = 1;
}
}
}
# Update the stack...
$depth_next--;
}
else {
# For ternary, note parent but do not include as child
my $seqno_parent = $seqno_stack{ $depth_next - 1 };
$seqno_parent = SEQ_ROOT unless defined($seqno_parent);
$rparent_of_seqno->{$type_sequence} = $seqno_parent;
# These are not yet used but could be useful
if ( $token eq '?' ) {
$K_opening_ternary->{$type_sequence} = $KK_new;
}
elsif ( $token eq ':' ) {
$K_closing_ternary->{$type_sequence} = $KK_new;
}
else {
# We really shouldn't arrive here, just being cautious:
# The only sequenced types output by the tokenizer are the
# opening & closing containers and the ternary types. Each
# of those was checked above. So we would only get here
# if the tokenizer has been changed to mark some other
# tokens with sequence numbers.
if (DEVEL_MODE) {
Fault(
"Unexpected token type with sequence number: type='$type', seqno='$type_sequence'"
);
}
}
}
}
# Remember the most recent two non-blank, non-comment tokens.
# NOTE: the phantom semicolon code may change the output stack
# without updating these values. Phantom semicolons are considered
# the same as blanks for now, but future needs might change that.
# See the related note in sub 'add_phantom_semicolon'.
$last_last_nonblank_code_type = $last_nonblank_code_type;
$last_last_nonblank_code_token = $last_nonblank_code_token;
$last_nonblank_code_type = $type;
$last_nonblank_code_token = $token;
$last_nonblank_block_type = $block_type;
# count selected types
if ( $is_counted_type{$type} ) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ( defined($seqno) ) {
$rtype_count_by_seqno->{$seqno}->{$type}++;
# Count line-ending commas for -bbx
if ( $type eq ',' && $Ktoken_vars == $Klast_old_code ) {
$rlec_count_by_seqno->{$seqno}++;
}
# Remember index of first here doc target
if ( $type eq 'h' && !$K_first_here_doc_by_seqno{$seqno} ) {
my $KK_new = @{$rLL_new};
$K_first_here_doc_by_seqno{$seqno} = $KK_new;
# the here doc which follows makes the container broken
if ( !$ris_permanently_broken->{$seqno} ) {
$ris_permanently_broken->{$seqno} = 1;
$self->mark_parent_containers( $seqno,
$ris_permanently_broken );
}
}
}
}
}
# cumulative length is the length sum including this token
$cumulative_length += $token_length;
$item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
$item->[_TOKEN_LENGTH_] = $token_length;
# For reference, here is how to get the parent sequence number.
# This is not used because it is slower than finding it on the fly
# in sub parent_seqno_by_K:
# my $seqno_parent =
# $type_sequence && $is_opening_token{$token}
# ? $seqno_stack{ $depth_next - 2 }
# : $seqno_stack{ $depth_next - 1 };
# my $KK = @{$rLL_new};
# $rseqno_of_parent_by_K->{$KK} = $seqno_parent;
# and finally, add this item to the new array
push @{$rLL_new}, $item;
return;
} ## end sub store_token
sub add_phantom_semicolon {
my ( $self, $KK ) = @_;
# The token at old index $KK is a closing block brace, and not preceded
# by a semicolon. Before we push it onto the new token list, we may
# want to add a phantom semicolon which can be activated if the the
# block is broken on output.
# We are only adding semicolons for certain block types
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
return unless ($type_sequence);
my $block_type = $rblock_type_of_seqno->{$type_sequence};
return unless ($block_type);
return
unless ( $ok_to_add_semicolon_for_block_type{$block_type}
|| $block_type =~ /^(sub|package)/
|| $block_type =~ /^\w+\:$/ );
# Find the most recent token in the new token list
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) ); # shouldn't happen except for bad input
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
my $type_sequence_p = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
# Do not add a semicolon if...
return
if (
# it would follow a comment (and be isolated)
$type_p eq '#'
# it follows a code block ( because they are not always wanted
# there and may add clutter)
|| $type_sequence_p && $rblock_type_of_seqno->{$type_sequence_p}
# it would follow a label
|| $type_p eq 'J'
# it would be inside a 'format' statement (and cause syntax error)
|| ( $type_p eq 'k'
&& $token_p =~ /format/ )
);
# Do not add a semicolon if it would impede a weld with an immediately
# following closing token...like this
# { ( some code ) }
# ^--No semicolon can go here
# look at the previous token... note use of the _NEW rLL array here,
# but sequence numbers are invariant.
my $seqno_inner = $rLL_new->[$Kp]->[_TYPE_SEQUENCE_];
# If it is also a CLOSING token we have to look closer...
if (
$seqno_inner
&& $is_closing_token{$token_p}
# we only need to look if there is just one inner container..
&& defined( $rchildren_of_seqno->{$type_sequence} )
&& @{ $rchildren_of_seqno->{$type_sequence} } == 1
)
{
# Go back and see if the corresponding two OPENING tokens are also
# together. Note that we are using the OLD K indexing here:
my $K_outer_opening = $K_old_opening_by_seqno{$type_sequence};
if ( defined($K_outer_opening) ) {
my $K_nxt = $self->K_next_nonblank($K_outer_opening);
if ( defined($K_nxt) ) {
my $seqno_nxt = $rLL->[$K_nxt]->[_TYPE_SEQUENCE_];
# Is the next token after the outer opening the same as
# our inner closing (i.e. same sequence number)?
# If so, do not insert a semicolon here.
return if ( $seqno_nxt && $seqno_nxt == $seqno_inner );
}
}
}
# We will insert an empty semicolon here as a placeholder. Later, if
# it becomes the last token on a line, we will bring it to life. The
# advantage of doing this is that (1) we just have to check line
# endings, and (2) the phantom semicolon has zero width and therefore
# won't cause needless breaks of one-line blocks.
my $Ktop = -1;
if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
&& $want_left_space{';'} == WS_NO )
{
# convert the blank into a semicolon..
# be careful: we are working on the new stack top
# on a token which has been stored.
my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', SPACE );
# Convert the existing blank to:
# a phantom semicolon for one_line_block option = 0 or 1
# a real semicolon for one_line_block option = 2
my $tok = EMPTY_STRING;
my $len_tok = 0;
if ( $rOpts_one_line_block_semicolons == 2 ) {
$tok = ';';
$len_tok = 1;
}
$rLL_new->[$Ktop]->[_TOKEN_] = $tok;
$rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = $len_tok;
$rLL_new->[$Ktop]->[_TYPE_] = ';';
$self->[_rtype_count_by_seqno_]->{$type_sequence}->{';'}++;
# NOTE: we are changing the output stack without updating variables
# $last_nonblank_code_type, etc. Future needs might require that
# those variables be updated here. For now, it seems ok to skip
# this.
# Then store a new blank
$self->store_token($rcopy);
}
else {
# Patch for issue c078: keep line indexes in order. If the top
# token is a space that we are keeping (due to '-wls=';') then
# we have to check that old line indexes stay in order.
# In very rare
# instances in which side comments have been deleted and converted
# into blanks, we may have filtered down multiple blanks into just
# one. In that case the top blank may have a higher line number
# than the previous nonblank token. Although the line indexes of
# blanks are not really significant, we need to keep them in order
# in order to pass error checks.
if ( $rLL_new->[$Ktop]->[_TYPE_] eq 'b' ) {
my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
if ( $new_top_ix < $old_top_ix ) {
$rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
}
}
my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', EMPTY_STRING );
$self->store_token($rcopy);
}
return;
} ## end sub add_phantom_semicolon
sub delay_trailing_comma_op {
my ( $self, $if_add, $stable_flag ) = @_;
# Given:
# $if_add = true for add comma operation, false for delete
# $stable_flag = true if -btct setting makes this stable
# Returns:
# true if a trailing comma operation should be skipped
# false otherwise
# This can prevent unwanted path-dependent formatting when both
# line breaks are changing and we are only adding or deleting
# commas, but not both. See git #156
# Get user setting, if any
my $delay = $rOpts->{'delay-trailing-comma-operations'};
# Set default if not defined:
# - if deleting: delay always ok
# - if adding: delay ok unless breaks will be stabilized by -btct setting
# Explanation:
# - deleting can be irreversible, so it is safest to delay
# - adding, along with -btct, can save original line breaks which would
# be lost otherwise, so it may be best not to delay.
if ( !defined($delay) ) {
$delay = $if_add ? !$stable_flag : 1;
}
return if ( !$delay );
# We must be at the first of multiple iterations for a delay
my $it = Perl::Tidy::get_iteration_count();
my $max_iterations = $rOpts->{'iterations'};
if ( $it == 1 && $max_iterations > 1 ) {
# if so, set flag to request another iteration
$self->[_want_second_iteration_] = 1;
return 1;
}
return;
} ## end sub delay_trailing_comma_op
my %is_b_i_h;
BEGIN {
my @q = qw( b i h );
@is_b_i_h{@q} = (1) x scalar(@q);
}
sub add_trailing_comma {
# Implement the --add-trailing-commas flag to the line end before index $KK:
my ( $self, $KK, $Kfirst, $trailing_comma_add_rule ) = @_;
# Input parameter:
# $KK = index of closing token in old ($rLL) token list
# which starts a new line and is not preceded by a comma
# $Kfirst = index of first token on the current line of input tokens
# $trailing_comma_add_rule = user control flags for adding trailng commas
# For example, we might want to add a comma here:
# bless {
# _name => $name,
# _price => $price,
# _rebate => $rebate <------ location of possible bare comma
# }, $pkg;
# ^-------------------closing token at index $KK on new line
# Do not add a comma if it would follow a comment
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
return if ( $type_p eq '#' );
return unless ($trailing_comma_add_rule);
my ( $trailing_comma_style, $paren_flag, $stable_flag ) =
@{$trailing_comma_add_rule};
# see if the user wants a trailing comma here
my $match =
$self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
$trailing_comma_style, $paren_flag, $stable_flag, 1 );
# Do not add if this would cause excess line length and possible
# instability. This is b1458 fix method 1. This is more general than fix
# method 2, below, which also worked. So this is not needed for b1458 but
# re-activated and updated for b1495.
if ( $match
&& $rOpts_delete_trailing_commas
&& $KK > 0 )
{
if ( !$stable_flag && $is_b_i_h{$trailing_comma_style} ) {
my $line_index = $rLL->[ $KK - 1 ]->[_LINE_INDEX_];
my $rlines = $self->[_rlines_];
my $line_of_tokens = $rlines->[$line_index];
my $input_line = $line_of_tokens->{_line_text};
my $len =
$length_function
? $length_function->($input_line) - 1
: length($input_line) - 1;
my $new_len = $want_left_space{','} ? $len + 2 : $len + 1;
my $level = $rLL->[$Kfirst]->[_LEVEL_];
my $max_len = $maximum_line_length_at_level[$level];
if ( $new_len > $max_len ) {
$match = 0;
}
}
}
# If so, and not delayed, add a comma
if ( $match && !$self->delay_trailing_comma_op( 1, $stable_flag ) ) {
# any blank after the comma will be added before the closing paren,
# below
$self->store_new_token( ',', ',', $Kp );
}
return;
} ## end sub add_trailing_comma
sub delete_trailing_comma {
my ( $self, $KK, $Kfirst, $trailing_comma_delete_rule ) = @_;
# Apply the --delete-trailing-commas flag to the comma before index $KK
# Input parameter:
# $KK = index of a closing token in OLD ($rLL) token list
# which is preceded by a comma on the same line.
# $Kfirst = index of first token on the current line of input tokens
# $delete_option = user control flag
# Returns true if the comma was deleted
# For example, we might want to delete this comma:
# my @asset = ("FASMX", "FASGX", "FASIX",);
# | |^--------token at index $KK
# | ^------comma of interest
# ^-------------token at $Kfirst
# Verify that the previous token is a comma. Note that we are working in
# the new token list $rLL_new.
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
# there must be a '#' between the ',' and closing token; give up.
return;
}
# Do not delete commas when formatting under stress to avoid instability.
# This fixes b1389, b1390, b1391, b1392. The $high_stress_level has
# been found to work well for trailing commas.
if ( $rLL_new->[$Kp]->[_LEVEL_] >= $high_stress_level ) {
return;
}
return unless ($trailing_comma_delete_rule);
my ( $trailing_comma_style, $paren_flag, $stable_flag ) =
@{$trailing_comma_delete_rule};
# See if the user wants this trailing comma
my $match =
$self->match_trailing_comma_rule( $KK, $Kfirst, $Kp,
$trailing_comma_style, $paren_flag, $stable_flag, 0 );
# Patch: the --noadd-whitespace flag can cause instability in complex
# structures. In this case do not delete the comma. Fixes b1409.
if ( !$match && !$rOpts_add_whitespace ) {
my $Kn = $self->K_next_nonblank($KK);
if ( defined($Kn) ) {
my $type_n = $rLL->[$Kn]->[_TYPE_];
if ( $type_n ne ';' && $type_n ne '#' ) { return }
}
}
# b1458 fix method 2: do not remove a comma after a leading brace type 'R'
# since it is under stress and could become unstable. This is a more
# specific fix but the logic is cleaner than method 1.
if ( !$match
&& $rOpts_add_trailing_commas
&& $rLL->[$Kfirst]->[_TYPE_] eq 'R' )
{
# previous old token should be the comma..
my $Kp_old = $self->K_previous_nonblank( $KK, $rLL );
if ( defined($Kp_old)
&& $Kp_old > $Kfirst
&& $rLL->[$Kp_old]->[_TYPE_] eq ',' )
{
# if the comma follows the first token of the line ..
my $Kpp_old = $self->K_previous_nonblank( $Kp_old, $rLL );
if ( defined($Kpp_old) && $Kpp_old eq $Kfirst ) {
# do not delete it
$match = 1;
}
}
}
# If no match and not delayed
if ( !$match && !$self->delay_trailing_comma_op( 0, $stable_flag ) ) {
# delete it
return $self->unstore_last_nonblank_token(',');
}
return;
} ## end sub delete_trailing_comma
sub delete_weld_interfering_comma {
my ( $self, $KK ) = @_;
# Apply the flag '--delete-weld-interfering-commas' to the comma
# before index $KK
# Input parameter:
# $KK = index of a closing token in OLD ($rLL) token list
# which is preceded by a comma on the same line.
# Returns true if the comma was deleted
# For example, we might want to delete this comma:
# my $tmpl = { foo => {no_override => 1, default => 42}, };
# || ^------$KK
# |^---$Kp
# $Kpp---^
#
# Note that:
# index $KK is in the old $rLL array, but
# indexes $Kp and $Kpp are in the new $rLL_new array.
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
return unless ($type_sequence);
# Find the previous token and verify that it is a comma.
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
if ( $rLL_new->[$Kp]->[_TYPE_] ne ',' ) {
# it is not a comma, so give up ( it is probably a '#' )
return;
}
# This must be the only comma in this list
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
return
unless ( defined($rtype_count)
&& $rtype_count->{','}
&& $rtype_count->{','} == 1 );
# Back up to the previous closing token
my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
return unless ( defined($Kpp) );
my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_];
my $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
# The containers must be nesting (i.e., sequence numbers must differ by 1 )
if ( $seqno_pp && $is_closing_type{$type_pp} ) {
if ( $seqno_pp == $type_sequence + 1 ) {
# remove the ',' from the top of the new token list
return $self->unstore_last_nonblank_token(',');
}
}
return;
} ## end sub delete_weld_interfering_comma
sub add_interbracket_arrow {
my ($self) = @_;
# Add a new '->' after the last token on the stack
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
# verify that we are adding after a } or ]
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
if ( $type_p ne 'R' && $type_p ne ']' ) {
DEVEL_MODE && Fault("trying to store new arrow after type $type_p");
return;
}
$self->store_new_token( '->', '->', $Kp );
if ( $want_right_space{'->'} == WS_YES ) { $self->store_token() }
return;
} ## end sub add_interbracket_arrow
sub delete_interbracket_arrow {
my ($self) = @_;
# Delete the last nonblank token on the stack which is an '->'
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
# verify that we are deleting an '->'
my $type_p = $rLL_new->[$Kp]->[_TYPE_];
if ( $type_p ne '->' ) {
DEVEL_MODE && Fault("trying to delete arrow but type $type_p");
return;
}
$self->unstore_last_nonblank_token( '->', -1 );
return;
} ## end sub delete_interbracket_arrow
sub unstore_last_nonblank_token {
my ( $self, $type, ($want_space) ) = @_;
# remove the most recent nonblank token from the new token list
# Input parameter:
# $type = type to be removed (for safety check)
# $want_space = telling if a space should remain
# 1 => always
# 0 or undef => only if there was one (used for ',')
# -1 => never (used for '->')
# Returns true if success
# false if error
# This was written and is used for removing commas, but might
# be useful for other tokens. If it is ever used for other tokens
# then the issue of what to do about the other variables, such
# as token counts and the '$last...' vars needs to be considered.
# Safety check, shouldn't happen
if ( @{$rLL_new} < 3 ) {
DEVEL_MODE && Fault("not enough tokens on stack to remove '$type'\n");
return;
}
if ( !defined($want_space) ) { $want_space = 0 }
my ( $rcomma, $rblank );
# Note: originally just for ',' but now also for '->'
# case 1: pop comma from top of stack
if ( $rLL_new->[-1]->[_TYPE_] eq $type ) {
$rcomma = pop @{$rLL_new};
}
# case 2: pop blank and then comma from top of stack
elsif ($rLL_new->[-1]->[_TYPE_] eq 'b'
&& $rLL_new->[-2]->[_TYPE_] eq $type )
{
$rblank = pop @{$rLL_new};
$rcomma = pop @{$rLL_new};
}
# case 3: error, shouldn't happen unless bad call
else {
DEVEL_MODE && Fault("Could not find token type '$type' to remove\n");
return;
}
# A note on updating vars set by sub store_token for this comma: If we
# reduce the comma count by 1 then we also have to change the variable
# $last_nonblank_code_type to be $last_last_nonblank_code_type because
# otherwise sub store_token is going to ALSO reduce the comma count.
# Alternatively, we can leave the count alone and the
# $last_nonblank_code_type alone. Then sub store_token will produce
# the correct result. This is simpler and is done here.
# remove a remaining blank if requested
if ( $rLL_new->[-1]->[_TYPE_] eq 'b' ) {
# current method for deleted '->'
if ( $want_space == -1 ) {
pop @{$rLL_new};
}
}
# add a blank if requested
else {
if ( $want_space == 1 ) {
$self->store_token();
}
elsif ( !$want_space ) {
# add one if there was one (current method for commas)
if ( defined($rblank) ) {
my $len = length($type);
$rblank->[_CUMULATIVE_LENGTH_] -= $len; # fix for deleted comma
push @{$rLL_new}, $rblank;
}
}
else {
# want_space=-1 so do not add a blank
}
}
return 1;
} ## end sub unstore_last_nonblank_token
sub is_list_assignment {
my ( $self, $K_opening ) = @_;
# Given:
# $K_opening = index in $rLL_new of an opening paren
# Return:
# true if this is a list assignment of the form '@xxx = ('
# false otherwise
return unless defined($K_opening);
my $Km = $self->K_previous_nonblank( $K_opening, $rLL_new );
return unless defined($Km);
my $type_m = $rLL_new->[$Km]->[_TYPE_];
# Look for list assignment like '@list = (' or '@{$ref} = ('
# or '%hash = ('
if ( $type_m eq '=' ) {
my $token_m = $rLL_new->[$Km]->[_TOKEN_];
$Km = $self->K_previous_nonblank( $Km, $rLL_new );
return unless defined($Km);
$type_m = $rLL_new->[$Km]->[_TYPE_];
$token_m = $rLL_new->[$Km]->[_TOKEN_];
# backup past a braced item
if ( $token_m eq '}' ) {
my $seqno_m = $rLL_new->[$Km]->[_TYPE_SEQUENCE_];
return unless ($seqno_m);
my $K_opening_m = $self->[_K_opening_container_]->{$seqno_m};
return unless defined($K_opening_m);
$Km = $self->K_previous_nonblank( $K_opening_m, $rLL_new );
return unless defined($Km);
$type_m = $rLL_new->[$Km]->[_TYPE_];
$token_m = $rLL_new->[$Km]->[_TOKEN_];
}
if ( $type_m eq 'i' || $type_m eq 't' ) {
my $sigil = substr( $token_m, 0, 1 );
if ( $sigil eq '@' ) {
return 1;
}
}
}
return;
} ## end sub is_list_assignment
my %is_not_list_paren;
BEGIN {
## trailing comma logic ignores opening parens preceded by these tokens
my @q = qw# if elsif unless while and or err not && | || ? : ! . #;
@is_not_list_paren{@q} = (1) x scalar(@q);
}
sub match_trailing_comma_rule {
my ( $self, $KK, $Kfirst, $Kp, $trailing_comma_style, $paren_flag,
$stable_flag, $if_add )
= @_;
# Decide if a trailing comma rule is matched.
# Input parameter:
# $KK = index of closing token in old ($rLL) token list which follows
# the location of a possible trailing comma. See diagram below.
# $Kfirst = (old) index of first token on the current line of input tokens
# $Kp = index of previous nonblank token in new ($rLL_new) array
# $trailing_comma_rule = packed user control flags
# $if_add = true if adding comma, false if deleting comma
# Returns:
# false if no match
# true if match
# !$if_add to keep the current state unchanged
# For example, we might be checking for addition of a comma here:
# bless {
# _name => $name,
# _price => $price,
# _rebate => $rebate <------ location of possible trailing comma
# }, $pkg;
# ^-------------------closing token at index $KK
# List of $trailing_comma_style values:
# undef stable: do not change
# '1' or '*' : every list should have a trailing comma
# 'm' a multi-line list should have a trailing commas
# 'b' trailing commas should be 'bare' (comma followed by newline)
# 'i' same as s=h but also include any list with no more than about one
# comma per line
# 'h' lists of key=>value pairs with a bare trailing comma
# '0' : no list should have a trailing comma
# ' ' or -wtc not defined : leave trailing commas unchanged [DEFAULT].
# Note the hierarchy:
# '1' includes all 'm' includes all 'b' includes all 'i' includes all 'h'
# Note: an interesting generalization would be to let an upper case
# letter denote the negation of styles 'm', 'b', 'h', 'i'. This might
# be useful for undoing operations. It would be implemented as a wrapper
# around this routine.
# Return !$if_add to keep the current state unchanged
my $no_change = !$if_add;
# If no style defined : do not add or delete
if ( !defined($trailing_comma_style) ) { return $no_change }
#----------------------------------------
# Set some flags describing this location
#----------------------------------------
my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
return $no_change unless ($type_sequence);
my $closing_token = $rLL->[$KK]->[_TOKEN_];
# factors which force stability
my $is_permanently_broken =
$self->[_ris_permanently_broken_]->{$type_sequence};
$is_permanently_broken ||= $rOpts_break_at_old_comma_breakpoints
&& !$rOpts_ignore_old_breakpoints;
$is_permanently_broken ||= $stable_flag;
my $K_opening = $self->[_K_opening_container_]->{$type_sequence};
return $no_change if ( !defined($K_opening) );
my $iline_first_comma =
$self->[_rfirst_comma_line_index_]->{$type_sequence};
my $iline_last_comma = $rLL_new->[$Kp]->[_LINE_INDEX_];
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence};
my $comma_count = 0;
my $fat_comma_count = 0;
my $has_inner_multiline_structure;
my $has_inner_multiline_commas;
# if outer container is paren, return if this is not a possible list
# For example, return for an if paren 'if ('
my $token = $rLL_new->[$K_opening]->[_TOKEN_];
my $is_arrow_call;
my $is_hash_value;
my $is_paren_list;
if ( $token eq '(' ) {
$is_paren_list = 1;
my $Km = $self->K_previous_nonblank( $K_opening, $rLL_new );
if ( defined($Km) ) {
my $type_m = $rLL_new->[$Km]->[_TYPE_];
my $token_m = $rLL_new->[$Km]->[_TOKEN_];
if ( $type_m eq 'k' ) {
if ( $is_not_list_paren{$token_m} ) { return $no_change }
}
$is_arrow_call = $type_m eq '->';
$is_hash_value = $type_m eq '=>';
}
}
if ($rtype_count) {
$comma_count = $rtype_count->{','};
$fat_comma_count = $rtype_count->{'=>'};
}
my $follows_isolated_closing_token;
#----------------------------------------------------------------
# If no existing commas, see if we have an inner nested container
#----------------------------------------------------------------
if (
!$comma_count
&& $if_add # for safety, should be true if no commas
&& $is_closing_type{$last_nonblank_code_type}
)
{
# check for nesting closing containers
my $Kpp = $self->K_previous_nonblank( undef, $rLL_new );
return if ( !defined($Kpp) );
my $seqno_pp = $rLL_new->[$Kpp]->[_TYPE_SEQUENCE_];
my $type_pp = $rLL_new->[$Kpp]->[_TYPE_];
# nesting containers have sequence numbers which differ by 1
my $is_nesting_right =
$seqno_pp
&& $is_closing_type{$type_pp}
&& ( $seqno_pp == $type_sequence + 1 );
# Do not add a comma which will be deleted by
# --delete-weld-interfering commas (b1471)
if ( $is_nesting_right
&& $rOpts_delete_weld_interfering_commas )
{
return;
}
# Does this trailing comma follow an isolated closing token?
if ($is_nesting_right) {
my $ix_pp = $rLL_new->[$Kpp]->[_LINE_INDEX_];
my $Kpp_m = $self->K_previous_nonblank( $Kpp, $rLL_new );
if ($Kpp_m) {
my $ix_pp_m = $rLL_new->[$Kpp_m]->[_LINE_INDEX_];
$follows_isolated_closing_token = $ix_pp > $ix_pp_m;
}
}
#--------------------------------
# If no comma and no fat comma...
#--------------------------------
if ( !$fat_comma_count ) {
# containers must be nesting on the right
return unless ($is_nesting_right);
# give up if it is a code block
if ( $self->[_rblock_type_of_seqno_]->{$seqno_pp} ) {
return;
}
# if outer container is paren, must be sub call or list assignment
# Note that _ris_function_call_paren_ does not currently include
# calls of the form '->(', so that has to be checked separately.
if ( $token eq '('
&& !$self->[_ris_function_call_paren_]->{$type_sequence}
&& !$is_arrow_call
&& !$is_hash_value
&& !$self->is_list_assignment($K_opening) )
{
return;
}
my $K_opening_pp = $self->[_K_opening_container_]->{$seqno_pp};
return unless defined($K_opening_pp);
my $iline_o = $rLL_new->[$K_opening_pp]->[_LINE_INDEX_];
my $iline_c = $rLL_new->[$Kpp]->[_LINE_INDEX_];
my $rtype_count_pp = $self->[_rtype_count_by_seqno_]->{$seqno_pp};
return unless ($rtype_count_pp);
$has_inner_multiline_structure =
$iline_c > $iline_o
&& ( $rtype_count_pp->{','} || $rtype_count_pp->{'=>'} )
&& !$rtype_count_pp->{';'};
return unless ($has_inner_multiline_structure);
# look for inner multiline commas
$iline_first_comma =
$self->[_rfirst_comma_line_index_]->{$seqno_pp};
return if ( !defined($iline_first_comma) );
my $iline_ppc = $rLL_new->[$Kpp]->[_LINE_INDEX_];
return if ( $iline_ppc <= $iline_first_comma );
$has_inner_multiline_commas = 1;
# OK, we have an inner container with commas
}
}
#--------------------------------
# Characterize the trailing comma
#--------------------------------
if ( !defined($iline_first_comma) ) {
# Shouldn't happen: if this sub was called without any commas in this
# container, then either we should have found one in a nested container
# or already returned.
if (DEVEL_MODE) {
my $type_kp = $rLL_new->[$Kp]->[_TYPE_];
Fault(
"at line $iline_last_comma but line of first comma not defined, at Kp=$Kp, type=$type_kp\n"
);
}
return;
}
# multiline commas: first and last commas on different lines
# Note that _ris_broken_container_ also stores the line diff
# but it is not available at this early stage.
my $line_diff_commas = $iline_last_comma - $iline_first_comma;
my $has_multiline_commas =
$line_diff_commas > 0 || $has_inner_multiline_commas;
# Multiline ('m'): the opening and closing tokens on different lines
my $iline_o = $rLL_new->[$K_opening]->[_LINE_INDEX_];
my $iline_c = $rLL->[$KK]->[_LINE_INDEX_];
my $is_multiline = $iline_c > $iline_o;
# Require additional stability factors when adding commas
if ($if_add) {
# basic stability rules
my $is_stable = (
# has commas not in parens, or multiple lines ending in commas
$comma_count
&& ( !$is_paren_list || $has_multiline_commas )
# or contains an inner multiline structure
|| $has_inner_multiline_structure
# or has other stabilizing factors, like comments and blank lines
|| $is_permanently_broken
);
# special stability rules for fat-commas ...
if ( !$is_stable && $fat_comma_count ) {
# stable if not in paren list
$is_stable ||= !$is_paren_list;
# a paren container must span several lines (b1489, b1490)
# and the trailing comma must follow an isolated closing token if
# just 1 '=>' (b1492 b1493 b1494)
$is_stable ||= ( $iline_c - $iline_o > 1 )
&& ( $follows_isolated_closing_token
|| $fat_comma_count > 1 );
}
$is_multiline &&= $is_stable;
}
# Bare 'b': a multiline where the closing container token starts a new line:
my $is_bare_trailing_comma = $is_multiline && $KK == $Kfirst;
#---------------------
# Check for a match...
#---------------------
my $match;
#----------------------------
# 0 : does not match any list
#----------------------------
if ( $trailing_comma_style eq '0' ) {
$match = 0;
}
#------------------------------
# '*' or '1' : matches any list
#------------------------------
elsif ( $trailing_comma_style eq '*' || $trailing_comma_style eq '1' ) {
$match = 1;
}
#-----------------------------
# 'm' matches a Multiline list
#-----------------------------
elsif ( $trailing_comma_style eq 'm' ) {
$match = $is_multiline;
}
#----------------------------------
# 'b' matches a Bare trailing comma
#----------------------------------
elsif ( $trailing_comma_style eq 'b' ) {
$match = $is_bare_trailing_comma;
}
#--------------------------------------------------------------------------
# 'h' matches a bare hash list with about 1 comma and 1 fat comma per line.
# 'i' matches a bare stable list with about 1 comma per line.
#--------------------------------------------------------------------------
elsif ( $trailing_comma_style eq 'h' || $trailing_comma_style eq 'i' ) {
# We can treat these together because they are similar.
# The set of 'i' matches includes the set of 'h' matches.
# the trailing comma must be bare for both 'h' and 'i'
return if ( !$is_bare_trailing_comma );
# There must be no more than one comma per line for both 'h' and 'i'
# The new_comma_count here will include the trailing comma.
my $new_comma_count = $comma_count;
$new_comma_count += 1 if ($if_add);
my $excess_commas = $new_comma_count - $line_diff_commas - 1;
if ( $excess_commas > 0 ) {
# Exception for a special edge case for option 'i': if the trailing
# comma is followed by a blank line or comment, then it cannot be
# covered. Then we can safely accept a small list to avoid
# instability (issue b1443).
if ( $trailing_comma_style eq 'i'
&& $iline_c - $rLL_new->[$Kp]->[_LINE_INDEX_] > 1
&& $new_comma_count <= 2 )
{
$match = 1;
}
# Patch for instability issue b1456: -boc can trick this test; so
# skip it when deleting commas to avoid possible instability
# with option 'h' in combination with -atc -dtc -boc;
elsif (
$trailing_comma_style eq 'h'
# this is a deletion (due to -dtc)
&& !$if_add
# -atc is also set
&& $rOpts_add_trailing_commas
# -boc is set and active
&& $rOpts_break_at_old_comma_breakpoints
&& !$rOpts_ignore_old_breakpoints
)
{
# ignore this test
}
else {
return 0;
}
}
# check fat commas
if (
!$match
&& $fat_comma_count
&& (
# - a list of key=>value pairs with at least 2 fat commas is a
# match for both 'h' and 'i'
$fat_comma_count >= 2
# - an isolated fat comma is a match for type 'h'
# and also 'i' (see note below)
|| (
$fat_comma_count == 1
&& $new_comma_count == 1
## && $if_add ## removed to fix b1476
## removed so that 'i' and 'h' work the same here
## && $trailing_comma_style eq 'h'
)
)
)
{
# but comma count (including trailer) and fat comma count must
# differ by by no more than 1. This allows for some small
# variations.
my $comma_diff = $new_comma_count - $fat_comma_count;
$match = ( $comma_diff >= -1 && $comma_diff <= 1 );
}
# For 'i' only, a list that can be shown to be stable is a match
if ( !$match && $trailing_comma_style eq 'i' ) {
$match = (
$is_permanently_broken
|| ( $rOpts_break_at_old_comma_breakpoints
&& !$rOpts_ignore_old_breakpoints )
);
}
}
#-------------------------------------------------------------------------
# Unrecognized parameter. This should have been caught in the input check.
#-------------------------------------------------------------------------
else {
DEVEL_MODE && Fault("Unrecognized parameter '$trailing_comma_style'\n");
# do not add or delete
return !$if_add;
}
# Now do any special paren check
if ( $match
&& $paren_flag
&& $paren_flag ne '1'
&& $paren_flag ne '*'
&& $closing_token eq ')' )
{
$match &&=
$self->match_paren_control_flag( $type_sequence, $paren_flag,
$rLL_new );
}
# Fix for b1379, b1380, b1381, b1382, b1384 part 1. Mark trailing commas
# for use by -vtc logic to avoid instability when -dtc and -atc are both
# active.
if ($match) {
if ( $if_add && $rOpts_delete_trailing_commas
|| !$if_add && $rOpts_add_trailing_commas )
{
$self->[_ris_bare_trailing_comma_by_seqno_]->{$type_sequence} = 1;
# The combination of -atc and -dtc and -cab=3 can be unstable
# (b1394). So we deactivate -cab=3 in this case.
# A value of '0' or '4' is required for stability of case b1451.
if ( $rOpts_comma_arrow_breakpoints == 3 ) {
$self->[_roverride_cab3_]->{$type_sequence} = 0;
}
}
}
return $match;
} ## end sub match_trailing_comma_rule
sub store_new_token {
my ( $self, $type, $token, $Kp ) = @_;
# Create and insert a completely new token into the output stream
# Caller must add space after this token if necessary
# Input parameters:
# $type = the token type
# $token = the token text
# $Kp = index of the previous token in the new list, $rLL_new
# This operation is a little tricky because we are creating a new token and
# we have to take care to follow the requested whitespace rules.
my $Ktop = @{$rLL_new} - 1;
my $top_is_space = $Ktop >= 0 && $rLL_new->[$Ktop]->[_TYPE_] eq 'b';
if ( $top_is_space && $want_left_space{$type} == WS_NO ) {
#----------------------------------------------------
# Method 1: Convert the top blank into the new token.
#----------------------------------------------------
# Be Careful: we are working on the top of the new stack, on a token
# which has been stored.
$rLL_new->[$Ktop]->[_TOKEN_] = $token;
$rLL_new->[$Ktop]->[_TOKEN_LENGTH_] = length($token);
$rLL_new->[$Ktop]->[_TYPE_] = $type;
# NOTE: we are changing the output stack without updating variables
# $last_nonblank_code_type, etc. Future needs might require that
# those variables be updated here. For now, we just update the
# type counts as necessary.
if ( $is_counted_type{$type} ) {
my $seqno = $seqno_stack{ $depth_next - 1 };
if ($seqno) {
$self->[_rtype_count_by_seqno_]->{$seqno}->{$type}++;
}
}
}
else {
#----------------------------------------
# Method 2: Use the normal storage method
#----------------------------------------
# Patch for issue c078: keep line indexes in order. If the top
# token is a space that we are keeping (due to '-wls=...) then
# we have to check that old line indexes stay in order.
# In very rare
# instances in which side comments have been deleted and converted
# into blanks, we may have filtered down multiple blanks into just
# one. In that case the top blank may have a higher line number
# than the previous nonblank token. Although the line indexes of
# blanks are not really significant, we need to keep them in order
# in order to pass error checks.
if ($top_is_space) {
my $old_top_ix = $rLL_new->[$Ktop]->[_LINE_INDEX_];
my $new_top_ix = $rLL_new->[$Kp]->[_LINE_INDEX_];
if ( $new_top_ix < $old_top_ix ) {
$rLL_new->[$Ktop]->[_LINE_INDEX_] = $new_top_ix;
}
}
else {
if ( $want_left_space{$type} == WS_YES ) {
$self->store_token();
}
}
my $rcopy = copy_token_as_type( $rLL_new->[$Kp], $type, $token );
$self->store_token($rcopy);
}
$last_last_nonblank_code_type = $last_nonblank_code_type;
$last_last_nonblank_code_token = $last_nonblank_code_token;
$last_nonblank_code_type = $type;
$last_nonblank_code_token = $token;
# This sub is currently called to store non-block types ',' and '->', so:
$last_nonblank_block_type = EMPTY_STRING;
return;
} ## end sub store_new_token
sub check_Q {
my ( $self, $KK, $Kfirst, $line_number ) = @_;
# Check that a quote looks okay, and report possible problems
# to the logfile.
# Given:
# $KK = index of the quote token
# $Kfirst = index of first token on the line
# $line_number = number of the line in the input stream
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $token =~ /\t/ ) {
$self->note_embedded_tab($line_number);
}
# The remainder of this routine looks for something like
# '$var = s/xxx/yyy/;'
# in case it should have been '$var =~ s/xxx/yyy/;'
# Start by looking for a token beginning with one of: s y m / tr
return
unless ( $is_s_y_m_slash{ substr( $token, 0, 1 ) }
|| substr( $token, 0, 2 ) eq 'tr' );
# ... and preceded by one of: = == !=
my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
return unless ( defined($Kp) );
my $previous_nonblank_type = $rLL_new->[$Kp]->[_TYPE_];
return unless ( $is_unexpected_equals{$previous_nonblank_type} );
my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
my $previous_nonblank_type_2 = 'b';
my $previous_nonblank_token_2 = EMPTY_STRING;
my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
if ( defined($Kpp) ) {
$previous_nonblank_type_2 = $rLL_new->[$Kpp]->[_TYPE_];
$previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
}
my $next_nonblank_token = EMPTY_STRING;
my $Kn = $KK + 1;
my $Kmax = @{$rLL} - 1;
if ( $Kn <= $Kmax && $rLL->[$Kn]->[_TYPE_] eq 'b' ) { $Kn += 1 }
if ( $Kn <= $Kmax ) {
$next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
}
my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
my $type_0 = $rLL->[$Kfirst]->[_TYPE_];
if (
# preceded by simple scalar
$previous_nonblank_type_2 eq 'i'
&& $previous_nonblank_token_2 =~ /^\$/
# followed by some kind of termination
# (but give complaint if we can not see far enough ahead)
&& $next_nonblank_token =~ /^[; \)\}]$/
# scalar is not declared
&& !( $type_0 eq 'k' && $is_my_our_local{$token_0} )
)
{
my $lno = 1 + $rLL_new->[$Kp]->[_LINE_INDEX_];
my $guess = substr( $previous_nonblank_token, 0, 1 ) . '~';
complain(
"Line $lno: Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
);
}
return;
} ## end sub check_Q
} ## end closure respace_tokens
sub resync_lines_and_tokens {
my $self = shift;
# Re-construct the arrays of tokens associated with the original input
# lines since they have probably changed due to inserting and deleting
# blanks and a few other tokens.
# Return parameters:
# set severe_error = true if processing needs to terminate
my $severe_error;
my $rqw_lines = [];
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $rlines = $self->[_rlines_];
my @Krange_code_without_comments;
my @Klast_valign_code;
# This is the next token and its line index:
my $Knext = 0;
my $Kmax = defined($Klimit) ? $Klimit : -1;
# Verify that old line indexes are in still order. If this error occurs,
# check locations where sub 'respace_tokens' creates new tokens (like
# blank spaces). It must have set a bad old line index.
if ( DEVEL_MODE && defined($Klimit) ) {
my $iline = $rLL->[0]->[_LINE_INDEX_];
foreach my $KK ( 1 .. $Klimit ) {
my $iline_last = $iline;
$iline = $rLL->[$KK]->[_LINE_INDEX_];
if ( $iline < $iline_last ) {
my $KK_m = $KK - 1;
my $token_m = $rLL->[$KK_m]->[_TOKEN_];
my $token = $rLL->[$KK]->[_TOKEN_];
my $type_m = $rLL->[$KK_m]->[_TYPE_];
my $type = $rLL->[$KK]->[_TYPE_];
Fault(<<EOM);
Line indexes out of order at index K=$KK:
at KK-1 =$KK_m: old line=$iline_last, type='$type_m', token='$token_m'
at KK =$KK: old line=$iline, type='$type', token='$token',
EOM
}
}
}
my $iline = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$iline++;
next if ( $line_of_tokens->{_line_type} ne 'CODE' );
# Get the old number of tokens on this line
my $rK_range_old = $line_of_tokens->{_rK_range};
my ( $Kfirst_old, $Klast_old ) = @{$rK_range_old};
my $Kdiff_old = 0;
if ( defined($Kfirst_old) ) {
$Kdiff_old = $Klast_old - $Kfirst_old;
}
# Find the range of NEW K indexes for the line:
# $Kfirst = index of first token on line
# $Klast = index of last token on line
my ( $Kfirst, $Klast );
my $Knext_beg = $Knext; # this will be $Kfirst if we find tokens
# Optimization: Although the actual K indexes may be completely
# changed after respacing, the number of tokens on any given line
# will often be nearly unchanged. So we will see if we can start
# our search by guessing that the new line has the same number
# of tokens as the old line.
my $Knext_guess = $Knext + $Kdiff_old;
if ( $Knext_guess > $Knext
&& $Knext_guess < $Kmax
&& $rLL->[$Knext_guess]->[_LINE_INDEX_] <= $iline )
{
# the guess is good, so we can start our search here
$Knext = $Knext_guess + 1;
}
# search for the change in input line number
while ($Knext <= $Kmax
&& $rLL->[$Knext]->[_LINE_INDEX_] <= $iline )
{
$Knext++;
}
if ( $Knext > $Knext_beg ) {
$Klast = $Knext - 1;
# Delete any terminal blank token
if ( $rLL->[$Klast]->[_TYPE_] eq 'b' ) { $Klast -= 1 }
if ( $Klast < $Knext_beg ) {
$Klast = undef;
}
else {
$Kfirst = $Knext_beg;
# Save ranges of non-comment code. This will be used by
# sub keep_old_line_breaks.
if ( $rLL->[$Kfirst]->[_TYPE_] ne '#' ) {
push @Krange_code_without_comments, [ $Kfirst, $Klast ];
}
# Only save ending K indexes of code types which are blank
# or 'VER'. These will be used for a convergence check.
# See related code in sub 'convey_batch_to_vertical_aligner'
my $CODE_type = $line_of_tokens->{_code_type};
if ( !$CODE_type
|| $CODE_type eq 'VER' )
{
push @Klast_valign_code, $Klast;
}
}
}
# It is only safe to trim the actual line text if the input
# line had a terminal blank token. Otherwise, we may be
# in a quote.
if ( $line_of_tokens->{_ended_in_blank_token} ) {
$line_of_tokens->{_line_text} =~ s/\s+$//;
}
$line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
# Deleting semicolons can create new empty code lines
# which should be marked as blank
if ( !defined($Kfirst) ) {
my $CODE_type = $line_of_tokens->{_code_type};
if ( !$CODE_type ) {
$line_of_tokens->{_code_type} = 'BL';
}
}
else {
#---------------------------------------------------
# save indexes of all lines with a 'q' at either end
# for later use by sub find_multiline_qw
#---------------------------------------------------
if ( $rLL->[$Kfirst]->[_TYPE_] eq 'q'
|| $rLL->[$Klast]->[_TYPE_] eq 'q' )
{
push @{$rqw_lines}, $iline;
}
}
}
# There shouldn't be any nodes beyond the last one. This routine is
# relinking lines and tokens after the tokens have been respaced. A fault
# here indicates some kind of bug has been introduced into the above loops.
# There is not good way to keep going; we better stop here.
if ( $Knext <= $Kmax ) {
Fault_Warn(
"unexpected tokens at end of file when reconstructing lines");
$severe_error = 1;
return ( $severe_error, $rqw_lines );
}
$self->[_rKrange_code_without_comments_] = \@Krange_code_without_comments;
# Setup the convergence test in the FileWriter based on line-ending indexes
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->setup_convergence_test( \@Klast_valign_code );
return ( $severe_error, $rqw_lines );
} ## end sub resync_lines_and_tokens
sub package_info_maker {
my ( $self, $rK_package_list ) = @_;
# Create a hash of values which can be used to find the package of any
# token. This sub must be called after rLL has been updated because it
# calls parent_seqno_by_K.
# Given:
# @{$rK_package_list} = a simple list of token index K of each 'package'
# statement in the file.
# Returns:
# {
# 'rpackage_info_list' => \@package_info_list,
# 'rpackage_lookup_list' => \@package_lookup_list,
# }
# which are two lists with useful information on all packages
my $rLL = $self->[_rLL_];
my $K_closing_container = $self->[_K_closing_container_];
my $Klimit = @{$rLL} - 1;
# RETURN LIST #1: package_info_list:
# The package of a token at an arbitrary index K is the last entry
# in the list for which K_opening < K < K_closing.
# If no package is found, then the package is 'main'.
# This list is in order of the index K of the package statements.
# so the search can stop if we find K_opening > K.
my @package_info_list;
# Start with an entry for 'main'
push @package_info_list,
{
type => 'package',
name => 'main',
level => 0,
line_start => 0,
K_opening => 0,
K_closing => $Klimit,
is_block => 0,
max_change => 0,
block_count => 0,
};
my @package_stack;
push @package_stack, 0;
# RETURN LIST #2: package_lookup_list:
# A flat list of [$name, $Kbegin, $Kend], where package is name '$name'
# from token index $Kbegin to the index $Kend. This is easier to use than
# LIST #1 since it eliminates the need for a stack.
my @package_lookup_list;
push @package_lookup_list, [ 'main', 0, 0 ];
foreach my $KK ( @{$rK_package_list} ) {
my $item = $rLL->[$KK];
my $type = $item->[_TYPE_];
# Stored K values may be off by 1 due to an added blank
if ( $type eq 'b' ) {
$KK += 1;
$item = $rLL->[$KK];
$type = $item->[_TYPE_];
}
# shouldn't happen:
if ( $type ne 'P' ) {
DEVEL_MODE && Fault("type '$type' expected to be 'P'\n");
next;
}
my $token = $item->[_TOKEN_];
my ( $keyword, $name ) = split /\s+/, $token, 2;
my $K_opening = $KK;
my $lx_start = $item->[_LINE_INDEX_];
# for non-BLOCK form:
my $level = $item->[_LEVEL_];
my $parent_seqno = $self->parent_seqno_by_K($KK);
my $is_block = 0;
# Check for BLOCK form:
# package NAME VERSION BLOCK
# Skip past VERSION
my $Kn = $self->K_next_code($KK);
if ( $Kn && $rLL->[$Kn]->[_TYPE_] eq 'n' ) {
$Kn = $self->K_next_code($Kn);
}
# Look for BLOCK
if ( $Kn && $rLL->[$Kn]->[_TOKEN_] eq '{' ) {
my $seqno_n = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
$level += 1;
$parent_seqno = $seqno_n;
$is_block = $seqno_n;
}
my $K_closing = $Klimit;
if ( $parent_seqno != SEQ_ROOT ) {
my $Kc = $K_closing_container->{$parent_seqno};
if ( defined($Kc) ) {
$K_closing = $Kc;
}
}
# This is the index of this new package in the package_info_list
my $ii_next = @package_info_list;
while (@package_stack) {
my $ii = $package_stack[-1];
my $Kc = $package_info_list[$ii]->{K_closing};
# pop an inactive stack item and keep going
if ( $Kc < $K_opening ) {
pop @package_stack;
my $i_top = $package_stack[-1];
my $name_top = $package_info_list[$i_top]->{name};
push @package_lookup_list, [ $name_top, $Kc + 1 ];
next;
}
# end a stack item at this level
else {
my $level_i = $package_info_list[$ii]->{level};
if ( $level_i == $level ) {
$package_info_list[$ii]->{K_closing} = $K_opening - 1;
pop @package_stack;
}
}
last;
} ## end while (@package_stack)
push @package_lookup_list, [ $name, $K_opening ];
push @package_stack, $ii_next;
# max_change and block_count are for possible future usage
push @package_info_list,
{
type => $keyword,
name => $name,
level => $level,
line_start => $lx_start + 1,
K_opening => $K_opening,
K_closing => $K_closing,
is_block => $is_block,
max_change => 0,
block_count => 0,
};
}
my $imax = @package_lookup_list - 1;
my $Kend = $Klimit;
foreach my $i ( reverse( 0 .. $imax ) ) {
$package_lookup_list[$i]->[2] = $Kend;
$Kend = $package_lookup_list[$i]->[1] - 1;
}
# Eliminate any needless starting package 'main'
if ( @package_lookup_list > 1 && $package_lookup_list[0]->[2] < 0 ) {
shift @package_lookup_list;
}
return {
'rpackage_info_list' => \@package_info_list,
'rpackage_lookup_list' => \@package_lookup_list,
};
} ## end sub package_info_maker
use constant DEBUG_COUNT => 0;
my %is_non_interfering_keyword;
my %is_keyword_returning_scalar;
BEGIN {
# Builtin keywords which do not interfere with counting args.
# They do not produce arrays and do not consume more than one arg, so
# following parens are not required.
my @q = qw(
abs and chr cmp continue cos
defined delete do else elsif eq
exp fc ge gt hex int
lc lcfirst le length local log
lt my ne not oct or
ord ord our pop pos rand
ref scalar shift sin sqrt srand
state uc ucfirst undef xor
);
@is_non_interfering_keyword{@q} = (1) x scalar(@q);
# Builtin keywords possibly taking multiple parameters but returning a
# scalar value. These can be handled if the args are in parens.
@q = qw( substr join atan2 );
@is_keyword_returning_scalar{@q} = (1) x scalar(@q);
}
sub count_list_elements {
my ( $self, $rarg_list ) = @_;
# Given call arg hash containing:
# $seqno_list = sequence number of a paren of list to be counted, or
# $K_list_start = starting index of list (for 'return' lists)
# $shift_count_min = starting min arg count items to include
# $shift_count_max = starting max arg count items to include
# $is_signature = true if this is a sub signature list
# $self_name = name of first arg found
# Return:
# -shift_count_min => starting min arg count items to include, or
# undef if a specific number was not determined
# -shift_count_max => starting max arg count items to include
# undef if a specific number was not determined
# -self_name => possibly updated name of first arg
# -initialized => a hash entry maintained by this routine
# for keeping track of repeated calls for 'return' lists
# Method:
# - The basic method is to count commas, but
# - if we encounter sigils @ or % or other problems which prevent a
# count, then we do a simple return; the count will then be indefinite.
# Set the counts to undef in case we have to do a simple return upon
# encountering an indeterminate list count
my $shift_count_min_input = $rarg_list->{shift_count_min};
## my $shift_count_max_input = $rarg_list->{shift_count_max};
$rarg_list->{shift_count_min} = undef;
$rarg_list->{shift_count_max} = undef;
my $seqno_list = $rarg_list->{seqno_list};
my $K_list_start = $rarg_list->{K_list_start};
my $is_signature = $rarg_list->{is_signature};
my $self_name = $is_signature ? EMPTY_STRING : $rarg_list->{self_name};
my $rLL = $self->[_rLL_];
my $K_list_end;
# Input option 1: $seqno_list is a container
my $is_return_list;
if ( defined($seqno_list) ) {
$K_list_start = $self->[_K_opening_container_]->{$seqno_list};
$K_list_end = $self->[_K_closing_container_]->{$seqno_list};
return unless ( defined($K_list_end) );
}
# Input option 2: $K_list_start is the index of a token,
# such as 'return', which has trailing args to count.
elsif ( defined($K_list_start) ) {
# Skip past a leading blank if necessary
if ( $rLL->[$K_list_start]->[_TYPE_] eq 'b' ) { $K_list_start++ }
$is_return_list = $rLL->[$K_list_start]->[_TYPE_] eq 'k'
&& $rLL->[$K_list_start]->[_TOKEN_] eq 'return';
$K_list_end = @{$rLL} - 1;
# Optimization for common case of simple return
my $Kn = $self->K_next_code($K_list_start);
return unless ($Kn);
my $type_n = $rLL->[$Kn]->[_TYPE_];
if ( $type_n eq ';'
|| $is_closing_type{$type_n}
|| ( $type_n eq 'k' && $is_if_unless{ $rLL->[$Kn]->[_TOKEN_] } ) )
{
$rarg_list->{shift_count_max} = 0;
return;
}
# Check for 'return ()'
if ( $rLL->[$Kn]->[_TOKEN_] eq '(' ) {
my $Knn = $self->K_next_code($Kn);
if ( $Knn && $rLL->[$Knn]->[_TOKEN_] eq ')' ) {
$rarg_list->{shift_count_max} = 0;
return;
}
}
}
else {
DEVEL_MODE && Fault("Neither seqno_list nor K_list_start defined\n");
return;
}
# Initialize the arg count for this call. We start with any 'shift' counts
# previously seen if this is not a signature or 'return' list
my $arg_count = 0;
if ( $seqno_list && $shift_count_min_input && !$is_signature ) {
$arg_count = $shift_count_min_input;
}
# For signature lists we need to remember a minimum
my $arg_count_min;
my @seqno_stack;
if ($seqno_list) { push @seqno_stack, $seqno_list }
my $KK = $K_list_start;
my $KK_last_last_nb;
my $KK_last_nb;
my $KK_this_nb = $K_list_start;
my $backup_on_last = sub {
# exclude the latest token upon encountering end of list
# to avoid adding 1 extra comma at the end
$KK_this_nb = $KK_last_nb;
$KK_last_nb = $KK_last_last_nb;
$KK_last_last_nb = undef;
return;
}; ## end $backup_on_last = sub
#--------------------------------------------------------
# Main loop to scan the container looking for list items.
#--------------------------------------------------------
while ( ++$KK < $K_list_end ) {
# safety check - shouldn't happen
if ( !$KK || $KK <= $KK_this_nb ) {
if (DEVEL_MODE) {
my $lno = $rLL->[$KK_this_nb]->[_LINE_INDEX_] + 1;
Fault("near line $lno: index $KK decreased, was $KK_this_nb\n");
}
return;
}
my $type = $rLL->[$KK]->[_TYPE_];
next if ( $type eq 'b' );
next if ( $type eq '#' );
last if ( $type eq ';' );
return if ( $type eq '..' );
# i.e., ($str=~/(\d+)(\w+)/) may be a list of n items
return if ( $type eq '=~' );
$KK_last_last_nb = $KK_last_nb;
$KK_last_nb = $KK_this_nb;
$KK_this_nb = $KK;
my $token = $rLL->[$KK]->[_TOKEN_];
# Handle a sequenced item
if ( my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_] ) {
if ( $is_opening_type{$type} ) {
if ( $token eq '(' ) {
# Skip past args to args to subs not returning
# lists, like 'pop(' 'length('
if ($KK_last_nb) {
my $token_last = $rLL->[$KK_last_nb]->[_TOKEN_];
my $type_last = $rLL->[$KK_last_nb]->[_TYPE_];
if ( $type_last eq 'k'
&& $is_non_interfering_keyword{$token_last} )
{
$KK = $self->[_K_closing_container_]->{$seqno};
next;
}
}
# If not a list..
if ( !$self->is_list_by_seqno($seqno) ) {
# always enter a container following 'return', as in:
# return (find_sub($subname) =~ /^(.*):(\d+)-(\d+)$/);
if ( $is_return_list && $KK_last_nb == $K_list_start ) {
push @seqno_stack, $seqno;
next;
}
my $Kc = $self->[_K_closing_container_]->{$seqno};
if ( !$Kc ) { $backup_on_last->(); last }
# Enter nested parens with inner list
# ( ( $v1, $v2) )
# | | | |
# $KK $Kn $Kc_p $Kc
if ( $self->[_rhas_list_]->{$seqno} ) {
my $Kc_p = $self->K_previous_code($Kc);
if ( $Kc_p && $rLL->[$Kc_p]->[_TOKEN_] eq ')' ) {
my $seqno_c_p =
$rLL->[$Kc_p]->[_TYPE_SEQUENCE_];
if ( $seqno_c_p && $seqno_c_p == $seqno + 1 ) {
my $Kn = $self->K_next_code($KK);
if ( $Kn && $rLL->[$Kn]->[_TOKEN_] eq '(' )
{
push @seqno_stack, $seqno;
next;
}
}
}
}
# enter a list slice, such as '(caller)[1,2]'
my $Kn = $self->K_next_code($Kc);
if ( $Kn && $rLL->[$Kn]->[_TOKEN_] eq '[' ) {
my $seqno_next = $rLL->[$Kn]->[_TYPE_SEQUENCE_];
if ( $seqno_next
&& $self->is_list_by_seqno($seqno_next) )
{
$KK = $Kn;
push @seqno_stack, $seqno_next;
next;
}
}
my $KK_n = $self->K_next_code($KK);
if ($KK_n) {
# look for something like return (@list), which
# will not be marked as a list due to lack of a
# comma
my $type_KK_n = $rLL->[$KK_n]->[_TYPE_];
my $token_KK_n = $rLL->[$KK_n]->[_TOKEN_];
if ( $type_KK_n eq 't' || $type_KK_n eq 'i' ) {
my $sigil = substr( $token_KK_n, 0, 1 );
if ( $sigil eq '@' || $sigil eq '%' ) { return }
}
elsif ( $type_KK_n eq 'k' ) {
# look for something like
# return (map { ...
if ( !$is_non_interfering_keyword{$token_KK_n} )
{
return;
}
}
else { }
}
}
# a list..
else {
# Descend into a paren list in some special cases:
if ($KK_last_nb) {
my $token_last = $rLL->[$KK_last_nb]->[_TOKEN_];
my $type_last = $rLL->[$KK_last_nb]->[_TYPE_];
# 'return (' or 'my ('
my $ok = $type_last eq 'k'
&& ( $token_last eq 'return'
|| $token_last eq 'my' );
# ',('
$ok ||= $type_last eq ',';
# '(('
$ok ||= $token_last eq '(';
# 'wantarray ? ('
$ok ||=
$KK_last_last_nb
&& $is_return_list
&& $rLL->[$KK_last_nb]->[_TYPE_] eq '?'
&& $rLL->[$KK_last_last_nb]->[_TOKEN_] eq
'wantarray';
if ($ok) {
push @seqno_stack, $seqno;
next;
}
}
}
}
# Otherwise skip past this container
$KK = $self->[_K_closing_container_]->{$seqno};
next;
}
elsif ( $is_closing_type{$type} ) {
my $seqno_test = pop @seqno_stack;
if ( $seqno_test && $seqno_test eq $seqno ) {
# hide all closing tokens to avoid adding an extra
# comma at the end at something like '$x,)'
$backup_on_last->();
next;
}
$backup_on_last->();
last;
}
elsif ( $type eq '?' ) {
# continue scanning ternary for 'return wantarray ?'
if ( $rLL->[$KK_last_nb]->[_TOKEN_] eq 'wantarray'
&& $rLL->[$KK_last_nb]->[_TYPE_] eq 'k'
&& $KK_last_last_nb
&& $rLL->[$KK_last_last_nb]->[_TOKEN_] eq 'return'
&& $rLL->[$KK_last_last_nb]->[_TYPE_] eq 'k' )
{
push @seqno_stack, $seqno;
next;
}
# give up in a return list
if ($is_return_list) {
return;
}
# otherwise skip past this ternary
$KK = $self->[_K_closing_ternary_]->{$seqno};
next;
}
elsif ( $type eq ':' ) {
my $seqno_test = pop @seqno_stack;
if ( $seqno_test && $seqno_test eq $seqno ) {
# for wantarray ternary, assume one item after ':'
# TODO: if wantarray was preceded by '!' then we should
# swap the two counts here
$arg_count_min = 1;
$backup_on_last->();
last;
}
$backup_on_last->();
last;
}
else {
DEVEL_MODE
&& Fault("unexpected seqno=$seqno for type='$type'\n");
}
}
# handle identifiers
elsif ( $type eq 'i' || $type eq 't' ) {
my $sigil = substr( $token, 0, 1 );
# give up if we find list sigils not preceded by 'scalar'
if ( $sigil eq '%' || $sigil eq '@' ) {
my $K_last = $self->K_previous_code($KK);
if ( defined($K_last) ) {
my $type_last = $rLL->[$K_last]->[_TYPE_];
next if ( $type_last eq '+' || $type_last eq 'p' );
next if ( $type_last eq q{\\} );
next if ( $type_last eq '!' );
my $token_last = $rLL->[$K_last]->[_TOKEN_];
next if ( $type_last eq 'k' && $token_last eq 'scalar' );
}
return;
}
# remember the name of the first item, maybe something like '$self'
elsif ( $sigil eq '$'
&& !$self_name
&& !$arg_count )
{
$self_name = $token;
$rarg_list->{self_name} = $self_name;
}
else {
# continue search
}
}
# handle commas: count commas separating args in a list
elsif ( $type eq ',' ) {
$arg_count++;
}
# treat fat commas as commas
elsif ( $type eq '=>' ) {
$arg_count++;
}
# an '=' in a signature indicates an optional arg
elsif ( $type eq '=' ) {
if ( $is_signature && !defined($arg_count_min) ) {
$arg_count_min = $arg_count;
}
}
# check for a paren-less call
elsif ( $is_kwU{$type} ) {
# Something like 'length $str' is ok
if ( $type eq 'k' ) {
# Something like 'length $str' is ok
next if ( $is_non_interfering_keyword{$token} );
next if ( $token eq 'wantarray' );
# hop over asubs
next if ( $token eq 'sub' );
# something like return 1 if ...
if ( $is_if_unless{$token} ) {
$backup_on_last->();
last;
}
}
# Certain subsequent tokens prevent problems
my $Kn = $self->K_next_code($KK);
next unless defined($Kn);
my $token_Kn = $rLL->[$Kn]->[_TOKEN_];
my $type_Kn = $rLL->[$Kn]->[_TYPE_];
next
if ( $token_Kn eq ')'
|| $type_Kn eq '=>'
|| $type_Kn eq '->'
|| $type_Kn eq ',' );
# Certain keywords returning scalars are okay if not made
# as paren-less calls
next
if ( $type eq 'k'
&& $token_Kn eq '('
&& $is_keyword_returning_scalar{$token} );
# Otherwise, the safe thing is to give up because a function call:
# -might be paren-less with multiple args, or
# -it might return a list (i.e. splice, split, localtime, ...)
# which will interfere with counting args
if (DEBUG_COUNT) {
my $lno = $rLL->[$KK]->[_LINE_INDEX_] + 1;
my $input_stream_name = get_input_stream_name();
print {*STDERR}
"DEBUG_COUNT: file $input_stream_name line=$lno type=$type tok=$token token_Kn=$token_Kn\n";
}
return;
}
else {
# continue search
}
} ## end while ( ++$KK < $K_list_end)
# Increase the count by 1 if the list does not have a trailing comma
if ( defined($KK_this_nb)
&& $KK_this_nb > $K_list_start
&& $rLL->[$KK_this_nb]->[_TYPE_] ne ',' )
{
$arg_count++;
}
if ( !defined($arg_count_min) ) {
$arg_count_min = $arg_count;
}
$rarg_list->{shift_count_min} = $arg_count_min;
$rarg_list->{shift_count_max} = $arg_count;
return;
} ## end sub count_list_elements
# A constant to limit backward searches
use constant MANY_TOKENS => 100;
my %is_shift_pop;
my %is_scalar_sigil;
my %is_array_sigil;
BEGIN {
my @q = qw( shift pop );
@is_shift_pop{@q} = (1) x scalar(@q);
@q = qw( $ * & );
@is_scalar_sigil{@q} = (1) x scalar(@q);
@q = qw( @ % );
@is_array_sigil{@q} = (1) x scalar(@q);
}
sub count_prototype_args {
my ($string) = @_;
# Given
# $string = a string with a prototype in parens, such as '($$;$)'
# Returns ($count_min, $count_max)
# $count_min = min specific number of args expected, or
# undef if number of args can vary
# $count_max = max specific number of args expected, or
# undef if number of args can vary
my @chars = split //, $string;
my $count_min = 0;
my $count_max = 0;
my $saw_semicolon;
my $bump_count = sub {
$count_max++;
$count_min++ if ( !$saw_semicolon );
return;
};
my $saw_array = sub {
$count_max = undef;
$count_min = undef if ( !$saw_semicolon );
return;
};
while (@chars) {
my $ch = shift @chars;
if ( !defined($ch) ) { $saw_array->(); last }
elsif ( $ch eq '(' ) { last if ($count_min) }
elsif ( $ch eq ')' ) { last }
elsif ( $ch eq ';' && !$saw_semicolon ) { $saw_semicolon = 1 }
elsif ( $ch eq '_' && !$saw_semicolon ) {
$saw_semicolon = 1;
$bump_count->() if ( !$count_min );
}
elsif ( $is_array_sigil{$ch} ) { $saw_array->(); last }
elsif ( $is_scalar_sigil{$ch} ) { $bump_count->(); }
elsif ( $ch eq q{\\} ) {
$ch = shift @chars;
last unless defined($ch);
$bump_count->();
}
else { next }
} ## end while (@chars)
return ( $count_min, $count_max );
} ## end sub count_prototype_args
sub find_sub_token {
my ( $self, $seqno_block ) = @_;
# Given:
# $seqno_block = sequence number of a sub block brace
# Return:
# $Ksub = index of the actual 'sub' token for the sub
# this will include the name of a named sub, and any prototype
# undef if cannot find it; this is not a critical sub, so no heroics
#
# Notation:
#
# sub find_sub_token {
# | |
# $Ksub --$K_opening_container for $seqno_block
my $rLL = $self->[_rLL_];
# See if sub respace_tokens saved the index of the previous type 'S'
# for us. May need to back up 1 token if spaces were deleted.
my $K_sub = $self->[_rK_sub_by_seqno_]->{$seqno_block};
if ( defined($K_sub) ) {
my $type = $rLL->[$K_sub]->[_TYPE_];
if ( $type ne 'S' ) {
$K_sub -= 1;
$type = $rLL->[$K_sub]->[_TYPE_];
if ( $type ne 'S' ) {
if (DEVEL_MODE) {
my $token = $rLL->[$K_sub]->[_TOKEN_];
my $lno = $rLL->[$K_sub]->[_LINE_INDEX_] + 1;
my $block_type =
$self->[_rblock_type_of_seqno_]->{$seqno_block};
Fault(<<EOM);
line $lno: Bad Ksub=$K_sub for block $seqno_block,
expecting type 'S' and token=$block_type
found type '$type' and token='$token'
EOM
}
# This shouldn't happen, but try to keep going
# with the help of the search loop below.
$K_sub = undef;
}
}
}
# Must search for it...
# Scan backward from the opening brace to find the keyword 'sub'
if ( !defined($K_sub) ) {
# We normally only arrive here for anonymous subs. But also
# if --indent-only is set because respace_tokens is skipped.
my $K_opening_block = $self->[_K_opening_container_]->{$seqno_block};
my $Kt_min = $K_opening_block - MANY_TOKENS;
if ( $Kt_min < 0 ) { $Kt_min = 0 }
foreach my $Kt ( reverse( $Kt_min .. $K_opening_block ) ) {
my $token = $rLL->[$Kt]->[_TOKEN_];
my $type = $rLL->[$Kt]->[_TYPE_];
if ( $type eq 'S' ) {
# type 'S' could be 'method xxx' or '$fn=sub () {' - see c372
$K_sub = $Kt;
last;
}
if ( ( $type eq 'k' || $type eq 'i' )
&& substr( $token, 0, 3 ) eq 'sub' )
{
# anonymous subs are type 'k'
$K_sub = $Kt;
last;
}
}
}
return $K_sub;
} ## end sub find_sub_token
sub count_default_sub_args {
my ( $self, $item, $seqno ) = @_;
# Given:
# $item = hash ref with sub arg info
# $seqno => sequence number of a sub block of a paren
# containing possible default args
# Task:
# count default args and update minimum arg count in $item
my $rLL = $self->[_rLL_];
return unless ($seqno);
# The token before the opening must be a ',' or '('
my $K_o = $self->[_K_opening_container_]->{$seqno};
my $K_test = $self->K_previous_code($K_o);
return unless defined($K_test);
my $token_test = $rLL->[$K_test]->[_TOKEN_];
return if ( $token_test ne ',' && $token_test ne '(' );
# Check that an opening token has the previous sequence number
if ( $token_test eq '(' ) {
my $seqno_o = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
if ( !$seqno_o || $seqno_o != $seqno - 1 ) {
# shouldn't happen: may be bad call value since the token
# with '$seqno' was just before a closing paren
DEVEL_MODE && Fault("seqno_o=$seqno_o != $seqno-1\n");
return;
}
}
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$seqno};
my $default_arg_count;
if ($rtype_count) {
# One or more commas, like: ( ... $v1, $v2, ($d1, $d2) )=@_
# Note that the comma_count does not include any trailing comma
# so we always add 1
$default_arg_count = $rtype_count->{','} + 1;
}
if ( !defined($default_arg_count) ) {
# Check for empty parens, like: ( ... $v1, $v2, () )=@_
my $K_n = $self->K_next_code($K_o);
my $K_c = $self->[_K_closing_container_]->{$seqno};
return if ( $K_n == $K_c );
# No commas but not empty, so 1 arg in parens
# Something like: ( ... $v1, $v2, ($d1) )=@_
$default_arg_count = 1;
}
return unless ($default_arg_count);
# Update the minimum count to exclude the defaults
if ( $item->{shift_count_min} >= $default_arg_count ) {
$item->{shift_count_min} -= $default_arg_count;
}
else {
DEVEL_MODE
&& Fault(
"default count is $default_arg_count but total is $item->{shift_count_min}"
);
}
return;
} ## end sub count_default_sub_args
sub count_sub_input_args {
my ( $self, $item ) = @_;
# Given: $item = hash ref with
# seqno => $seqno_block = sequence number of a sub block
# max_arg_count => optional optimization flag, see note below
# Updates hash ref $item with values for keys:
# shift_count_min => minimum absolute number of input args
# shift_count_max => maximum absolute number of input args
# self_name => name of first arg (if it can be determined)
# is_signature => true if args are in a signature
# .. plus several other quantities of interest to the caller
# These keys are left undefined if they cannot be determined.
# 'shift_count_min' and 'shift_count_max' are the same except for
# a signature or prototype.
my $seqno_block = $item->{seqno};
return unless ($seqno_block);
# Pull out optional optimization flag. If this is true then there
# may be calls to this sub with args, so we should to do a full
# search of the entire sub if this would cause a -wma warning.
my $max_arg_count = $item->{max_arg_count};
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $K_opening_block = $self->[_K_opening_container_]->{$seqno_block};
# Find index '$K' of the last '@_' in this sub, if any
# Note on '$K_last_at_underscore': if we exit with only seeing shifts,
# but a pre-scan saw @_ somewhere after the last K, then the count
# is dubious and we do a simple return
my $K_last_at_underscore = 0;
my $rKlist = $self->[_rK_AT_underscore_by_sub_seqno_]->{$seqno_block};
if ( defined($rKlist) ) {
$K_last_at_underscore = $rKlist->[-1];
}
# Note on $_[n]: if there are any shifts of @_ or references to @_, we
# cannot use these for a count. Otherwise, we can use the range of n in
# $_[n] to get an expected arg count if all indexes n are simple integers.
# So for example if we see anything like $_[2+$i] we have to give up.
my $seqno_at_index_min;
my $at_index_min;
my $at_index_max;
my $dollar_underscore_zero_name = sub {
# Find the first arg name for a sub which references $_[0] and does
# not do shifting. There are two possibilities:
# return '$word' in something like '$word = $_[0];'
# return nothing otherwise
return unless ( $seqno_at_index_min && $at_index_min == 0 );
my $Ko = $K_opening_container->{$seqno_at_index_min};
my $Kc = $K_closing_container->{$seqno_at_index_min};
return unless ( $Ko && $Kc );
my $K_semicolon = $self->K_next_code($Kc);
return unless ( $K_semicolon && $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
my $K_m = $self->K_previous_code($Ko);
return unless ( $K_m && $rLL->[$K_m]->[_TOKEN_] eq '$_' );
my $K_mm = $self->K_previous_code($K_m);
return unless ( $K_mm && $rLL->[$K_mm]->[_TYPE_] eq '=' );
my $K_mmm = $self->K_previous_code($K_mm);
return unless ( $K_mmm && $rLL->[$K_mmm]->[_TYPE_] eq 'i' );
my $name = $rLL->[$K_mmm]->[_TOKEN_];
return unless ( $name =~ /^\$\w/ );
return $name;
}; ## end $dollar_underscore_zero_name = sub
my $rseqno_DOLLAR_underscore =
$self->[_rDOLLAR_underscore_by_sub_seqno_]->{$seqno_block};
if ( !defined($rKlist) && $rseqno_DOLLAR_underscore ) {
my $ok;
foreach my $seqno_DOLLAR ( @{$rseqno_DOLLAR_underscore} ) {
$ok = 0;
my $Ko = $K_opening_container->{$seqno_DOLLAR};
my $Kn = $self->K_next_code($Ko);
last unless ($Kn);
last unless ( $rLL->[$Kn]->[_TYPE_] eq 'n' );
my $token = ( $rLL->[$Kn]->[_TOKEN_] );
last unless ( $token =~ /^\d+$/ );
my $Knn = $self->K_next_code($Kn);
my $Kc = $K_closing_container->{$seqno_DOLLAR};
last unless ( $Knn && $Kc && $Knn == $Kc );
if ( !defined($at_index_min) || $token < $at_index_min ) {
$at_index_min = $token;
if ( !defined($seqno_at_index_min) ) {
$seqno_at_index_min = $seqno_DOLLAR;
}
}
if ( !defined($at_index_max) || $token > $at_index_max ) {
$at_index_max = $token;
}
$ok = 1;
}
if ( !$ok ) {
$at_index_min = undef;
$at_index_max = undef;
}
}
# flag indicating we saw a "pop @_" or just "pop;";
my $saw_pop_at_underscore;
my $ix_HERE_END = -1;
my $K_sub = $self->find_sub_token($seqno_block);
# shouldn't happen:
if ( !defined($K_sub) || $K_sub >= $K_opening_block ) {
if ( !defined($K_sub) ) { $K_sub = 'undef' }
DEVEL_MODE && Fault("Bad K_sub=$K_sub, opening=$K_opening_block\n");
return;
}
#----------------------------------
# Check for and process a prototype
#----------------------------------
my $sub_token = $rLL->[$K_sub]->[_TOKEN_];
my $iproto_beg = index( $sub_token, '(' );
if ( $iproto_beg > 0 ) {
my $iproto_end = index( $sub_token, ')', $iproto_beg );
if ( $iproto_end > $iproto_beg ) {
my $prototype =
substr( $sub_token, $iproto_beg, $iproto_end - $iproto_beg + 1 );
my ( $prototype_count_min, $prototype_count_max ) =
count_prototype_args($prototype);
$item->{prototype} = $prototype;
$item->{prototype_count_min} = $prototype_count_min;
$item->{prototype_count_max} = $prototype_count_max;
# Since we don't yet know if we must add 1 for a method call, we
# will just continue normally and let the caller figure it out.
}
}
#---------------------------------------
# Check for and process a signature list
#---------------------------------------
my $Ksub_p = $self->K_next_code($K_sub);
if ( $rLL->[$Ksub_p]->[_TYPE_SEQUENCE_]
&& $rLL->[$Ksub_p]->[_TOKEN_] eq '(' )
{
# Switch to searching the signature container. We will get the
# count when we arrive at the closing token.
my $seqno_list = $rLL->[$Ksub_p]->[_TYPE_SEQUENCE_];
$item->{seqno_list} = $seqno_list;
$item->{is_signature} = 1;
$self->count_list_elements($item);
# We are finished for a signature list
return;
}
#-------------------------------------------------------------
# Main loop: look for =shift; and =@_; within sub block braces
#-------------------------------------------------------------
my $seqno = $seqno_block;
my $K_opening = $self->[_K_opening_container_]->{$seqno};
my $K_closing = $self->[_K_closing_container_]->{$seqno};
return unless defined($K_closing);
my $level_opening = $rLL->[$K_opening]->[_LEVEL_];
# Count number of 'shift;' at the top level
my $shift_count = 0;
my $self_name = EMPTY_STRING;
my $semicolon_count_after_last_shift = 0;
my $in_interpolated_quote;
my $KK = $K_opening;
my $KK_this_nb = $KK;
while ( ++$KK < $K_closing ) {
# safety check - shouldn't happen
if ( !$KK || $KK <= $KK_this_nb ) {
if (DEVEL_MODE) {
my $lno = $rLL->[$KK_this_nb]->[_LINE_INDEX_] + 1;
Fault("near line $lno: index $KK decreased, was $KK_this_nb\n");
}
return;
}
my $type = $rLL->[$KK]->[_TYPE_];
next if ( $type eq 'b' );
next if ( $type eq '#' );
$KK_this_nb = $KK;
my $token = $rLL->[$KK]->[_TOKEN_];
# Note that '$_' here is marked as type 'Z': print $_[0];
if ( $type eq 'i' || $type eq 'Z' ) {
# look for '@_'
if ( $token eq '@_' ) {
# Found '@_': the search will end here
my $level = $rLL->[$KK]->[_LEVEL_];
# Give up upon finding @_ at a lower level
return unless ( $level == $level_opening + 1 );
# Look ahead for ';'
my $K_p = $self->K_next_code($KK);
return unless ($K_p);
return unless ( $rLL->[$K_p]->[_TYPE_] eq ';' );
# Look back for ' = @_'
my $K_m = $self->K_previous_code($KK);
return unless defined($K_m);
my $type_m = $rLL->[$K_m]->[_TYPE_];
return unless ( $type_m eq '=' );
# Look back for ' ) = @_'
my $K_mm = $self->K_previous_code($K_m);
return unless defined($K_mm);
my $token_mm = $rLL->[$K_mm]->[_TOKEN_];
my $seqno_mm = $rLL->[$K_mm]->[_TYPE_SEQUENCE_];
# Count args in the list ( ... ) = @_;
if ( $seqno_mm && $token_mm eq ')' ) {
$item->{seqno_list} = $seqno_mm;
$item->{is_signature} = 0;
$item->{shift_count_min} = $shift_count;
$item->{shift_count_max} = $shift_count;
$self->count_list_elements($item);
# Count default args placed in separate parens, such as:
# .. $v1 ,($def1, $def2)) = @_
# .. $v1 ,($def1, $def2),) = @_
# look at the token before the last ')'
my $K_mm_p = $self->K_previous_code($K_mm);
my $token_mm_p =
$K_mm_p ? $rLL->[$K_mm_p]->[_TOKEN_] : SPACE;
# skip past a trailing comma
if ( $token_mm_p eq ',' ) {
$K_mm_p = $self->K_previous_code($K_mm_p);
$token_mm_p =
$K_mm_p ? $rLL->[$K_mm_p]->[_TOKEN_] : SPACE;
}
# if we find a closing paren, count the items and
# update shift_count_min
if ( $token_mm_p eq ')' ) {
my $seqno_mm_p = $rLL->[$K_mm_p]->[_TYPE_SEQUENCE_];
$self->count_default_sub_args( $item, $seqno_mm_p );
}
# NOTE: this could disagree with $_[n] usage; we
# ignore this for now.
return;
}
# Give up if = @_ is not preceded by a simple list
return;
}
# Give up if we find an indexed ref to $_[..]
elsif ( $token eq '$_' ) {
# Found $_: currently the search ends at '$_['
my $Kn = $self->K_next_code($KK);
if ( $Kn && $rLL->[$Kn]->[_TOKEN_] eq '[' ) {
# Give up unless we might be able to define a count
# when there are just references to $_[n] values
if ( !defined($at_index_max) || $shift_count ) {
return;
}
}
}
# Give up at something like '&func;'
elsif ( substr( $token, 0, 1 ) eq '&' ) {
my $Kn = $self->K_next_code($KK);
if ( $Kn && $rLL->[$Kn]->[_TOKEN_] ne '(' ) {
return;
}
}
else {
# continue search
}
}
#------------------------------
# look for '=shift;' or '=pop;'
#------------------------------
elsif ( $type eq 'k' ) {
if ( $is_shift_pop{$token} ) {
# look for 'shift;' and count as 1 arg
my $Kp = $self->K_next_code($KK);
return unless defined($Kp);
my $type_p = $rLL->[$Kp]->[_TYPE_];
my $token_p = $rLL->[$Kp]->[_TOKEN_];
# look for any of these with shift or pop:
# shift;
# shift @_;
# shift();
# shift(@_);
# remove any opening paren
my $in_parens;
if ( $token_p eq '(' ) {
$in_parens = 1;
$Kp = $self->K_next_code($Kp);
return unless defined($Kp);
$type_p = $rLL->[$Kp]->[_TYPE_];
$token_p = $rLL->[$Kp]->[_TOKEN_];
}
# look for '@_'
if ( $type_p eq 'i' || $type_p eq 't' ) {
# keep going if not @_
next if ( $token_p ne '@_' );
$Kp = $self->K_next_code($Kp);
return unless defined($Kp);
$type_p = $rLL->[$Kp]->[_TYPE_];
$token_p = $rLL->[$Kp]->[_TOKEN_];
}
# remove any closing paren
if ( $in_parens && $token_p eq ')' ) {
$Kp = $self->K_next_code($Kp);
return unless defined($Kp);
$type_p = $rLL->[$Kp]->[_TYPE_];
$token_p = $rLL->[$Kp]->[_TOKEN_];
}
# Just give up if this shift is not followed by a semicolon or
# closing brace or arrow. This is the safe thing to do to avoid
# false errors. There are too many ways for problems to arise.
# Especially if the next token is one of '||' '//' 'or'.
return
if ( $type_p ne ';' && $type_p ne '->' && $Kp ne $K_closing );
my $level = $rLL->[$KK]->[_LEVEL_];
# Give up on lower level shifts
return unless ( $level == $level_opening + 1 );
# If we get to the end without finding '(..) = @_;' then
# we will consider the count unreliable if we saw a 'pop'
# or if a previous block contained other statements.
$saw_pop_at_underscore ||= $token eq 'pop';
$shift_count++;
$semicolon_count_after_last_shift = 0;
# Save self name:
# '$self = shift'
# | | |
# $K_mm $K_m $KK
if ( $shift_count == 1 && !$self_name ) {
my $K_m = $self->K_previous_code($KK);
return unless ( defined($K_m) );
my $type_m = $rLL->[$K_m]->[_TYPE_];
# For something like: sub get_thing {shift->{thing}}
# use $_[0] as the name
if ( $type_p eq '->' ) {
if ( $type_m eq '{' || $type_m eq ';' ) {
$self_name = '$_[0]';
$item->{self_name} = $self_name;
}
}
else {
if ( $type_m eq '=' ) {
my $K_mm = $self->K_previous_code($K_m);
return unless defined($K_mm);
my $type_mm = $rLL->[$K_mm]->[_TYPE_];
my $token_mm = $rLL->[$K_mm]->[_TOKEN_];
my $seqno_mm = $rLL->[$K_mm]->[_TYPE_SEQUENCE_];
# check for $self in parens, like ($self)=shift
if ( $seqno_mm && $token_mm eq ')' ) {
my $Ko = $K_opening_container->{$seqno_mm};
$K_mm = $self->K_next_code($Ko);
if ($K_mm) {
$type_mm = $rLL->[$K_mm]->[_TYPE_];
$token_mm = $rLL->[$K_mm]->[_TOKEN_];
}
}
if ( $type_mm eq 'i' ) {
$self_name = $token_mm;
# we store self_name immediately because it will
# be needed even if we cannot get an arg count
$item->{self_name} = $self_name;
}
}
}
}
# Skip past any parens and @_; let the semicolon be seen next
if ( $KK < $Kp - 1 ) { $KK = $Kp - 1 }
}
elsif ( $token eq 'bless' ) {
# Could look for something like the following:
# my $self = bless {}, $class;
# my $self = bless {}, shift;
}
elsif ( $is_if_unless{$token} ) {
#-------------------------------
# RETURN: Optional early return.
#-------------------------------
# Give up and exit at 'if' or 'unless' if we have seen a few
# semicolons following the last 'shift'. The number '2' here
# has been found to work well.
if ( $semicolon_count_after_last_shift > 2 ) {
if ( !defined($max_arg_count)
|| $max_arg_count <= $shift_count )
{
if ( !$saw_pop_at_underscore
&& $KK >= $K_last_at_underscore )
{
$item->{shift_count_min} = $shift_count;
$item->{shift_count_max} = $shift_count;
}
return;
}
}
}
else {
}
}
# Check for a container boundary
elsif ( $rLL->[$KK]->[_TYPE_SEQUENCE_] ) {
if ( $is_opening_type{$type} ) {
my $seqno_test = $rLL->[$KK]->[_TYPE_SEQUENCE_];
#---------------------------------------------
# Skip past a sub declearation within this sub
#---------------------------------------------
if ( $self->[_ris_sub_block_]->{$seqno_test}
|| $self->[_ris_asub_block_]->{$seqno_test} )
{
my $Kc = $self->[_K_closing_container_]->{$seqno_test};
return if ( !$Kc );
return if ( $Kc <= $KK );
$KK = $Kc;
}
}
}
elsif ( $type eq ';' ) {
$semicolon_count_after_last_shift++;
}
# scan a quote for @_ and $_[
elsif ( $type eq 'Q' ) {
my $K_last_code = $self->K_previous_code($KK);
next unless defined($K_last_code);
my $K_last_type = $rLL->[$K_last_code]->[_TYPE_];
if ( $K_last_type eq 'Q' ) {
# starting in quote : use old interpolation value
}
elsif ( $is_re_match_op{$K_last_type} ) {
$in_interpolated_quote = 1;
}
# is not interpolated for leading operators: qw q tr y '
elsif ( $token =~ /^(qw | q[^qrx] | tr | [y\'] )/x ) {
$in_interpolated_quote = 0;
}
# is interpolated for everything else
else {
$in_interpolated_quote = 1;
}
# look for '@_' and '$_[' in an interpolated quote
next unless ($in_interpolated_quote);
my $pos;
$pos = index( $token, '@_' );
return
if ( $pos == 0
|| $pos > 0 && substr( $token, $pos - 1, 1 ) ne BACKSLASH );
$pos = index( $token, '$_[' );
return
if ( $pos == 0
|| $pos > 0 && substr( $token, $pos - 1, 1 ) ne BACKSLASH );
}
# scan here text for @_ and $_[
elsif ( $type eq 'h' ) {
next if ( !is_interpolated_here_doc($token) );
my $ix_line = $rLL->[$KK]->[_LINE_INDEX_];
my $ix_HERE = max( $ix_HERE_END, $ix_line );
( $ix_HERE_END, my $here_text ) = $self->get_here_text($ix_HERE);
if ($here_text) {
my $pos;
$pos = index( $here_text, '@_' );
return
if (
$pos == 0
|| ( $pos > 0
&& substr( $here_text, $pos - 1, 1 ) ne BACKSLASH )
);
$pos = index( $here_text, '$_[' );
return
if (
$pos == 0
|| ( $pos > 0
&& substr( $here_text, $pos - 1, 1 ) ne BACKSLASH )
);
}
}
else {
# continue search
}
} ## end while ( ++$KK < $K_closing)
#--------------------------------
# the whole file has been scanned
#--------------------------------
# if no shifts @_ and no references to @_, look for $[n]
if ( defined($at_index_max) && !$shift_count ) {
$shift_count = $at_index_max + 1;
# Create a self name like '$_[0]' if we can't find user-defined name.
# Then any sub calls with '$_[0]->' will be recognized as self
# calls by sub cross_check_sub_calls.
if ( !$self_name && $at_index_min == 0 ) {
$self_name = $dollar_underscore_zero_name->();
$self_name = '$_[0]' unless ($self_name);
$item->{self_name} = $self_name;
}
}
if ( !$saw_pop_at_underscore ) {
$item->{shift_count_min} = $shift_count;
$item->{shift_count_max} = $shift_count;
}
return;
} ## end sub count_sub_input_args
use constant DEBUG_RETURN_COUNT => 0;
sub count_sub_return_args {
my ( $self, $item ) = @_;
# Given: $item = hash ref with
# seqno => sequence number of a sub block
# Set values for these keys in '$item':
# return_count_min => minimum number of output args
# = undef if indeterminate, such as @list
# K_return_count_min => K value of the min
# return_count_max => maximum number of output args
# = undef if indeterminate, such as @list
# K_return_count_max => K value of the max
my $seqno_sub = $item->{seqno};
return unless ($seqno_sub);
my $rKlist = $self->[_rK_return_by_sub_seqno_]->{$seqno_sub};
return if ( !defined($rKlist) );
# loop over all return statements in this sub
my $rLL = $self->[_rLL_];
my $rhash = {};
my $rK_return_count_hash = {};
# retain old vars during transition phase
my $return_count_min;
my $return_count_max;
foreach ( @{$rKlist} ) {
my $K_return = $rLL->[$_]->[_TYPE_] eq 'b' ? $_ + 1 : $_;
## my $type = $rLL->[$K_return]->[_TYPE_];
my $token = $rLL->[$K_return]->[_TOKEN_];
if ( $token ne 'return' ) {
DEVEL_MODE && Fault("expecting 'return' but got $token\n");
last;
}
$rhash->{K_list_start} = $K_return;
$self->count_list_elements($rhash);
my $count = $rhash->{shift_count_max};
if ( !defined($count) ) {
$item->{return_count_indefinite} = $K_return;
$item->{return_count_max} = undef;
last;
}
# new count?
if ( !$rK_return_count_hash->{$count} ) {
$rK_return_count_hash->{$count} = $K_return;
}
# retain old vars during transition phase
# Note: using <= to match old results but could use <
if ( !defined($return_count_min) || $count <= $return_count_min ) {
$return_count_min = $count;
$item->{return_count_min} = $count;
## $item->{K_return_count_min} = $K_return;
}
# Note: using >= to match old results but could use >
if ( !defined($return_count_max) || $count >= $return_count_max ) {
$return_count_max = $count;
$item->{return_count_max} = $count;
$item->{K_return_count_max} = $K_return;
}
}
$item->{rK_return_count_hash} = $rK_return_count_hash;
if ( DEBUG_RETURN_COUNT > 1 ) {
my $min = $item->{return_count_min};
my $max = $item->{return_count_max};
$min = '*' unless defined($min);
$max = '*' unless defined($max);
print "DEBUG_RETURN: returning min=$min max=$max\n";
}
return;
} ## end sub count_sub_return_args
sub count_return_values_wanted {
my ( $self, $item ) = @_;
# Given: $item = a hash ref with
# seqno_list => sequence number the call arg list of a sub call
# Set value for this key in '$item':
# return_count_wanted => number of return items wanted from the call
# = undef if indeterminate, such as @list
# get the sequence number of the call arg list for this call
my $seqno_list = $item->{seqno_list};
return unless ($seqno_list);
# Give up if call is followed by a bound operator, for example
# my ( $fh, $tmpfile ) = $self->io()->tempfile( DIR => $dir );
# |
# ^--$Kc
my $rLL = $self->[_rLL_];
my $Kc = $self->[_K_closing_container_]->{$seqno_list};
my $Kc_n = $self->K_next_code($Kc);
if ($Kc_n) {
my $type_n = $rLL->[$Kc_n]->[_TYPE_];
my $ok = $type_n eq ';' || $is_closing_type{$type_n};
if ( !$ok && $type_n eq 'k' ) {
my $token_n = $rLL->[$Kc_n]->[_TOKEN_];
$ok ||= $is_if_unless{$token_n};
$ok ||= $is_and_or{$token_n};
}
return unless $ok;
}
my $Ko = $self->[_K_opening_container_]->{$seqno_list};
my $K_m = $self->K_previous_code($Ko);
my $K_mm = $self->K_previous_code($K_m);
return unless ( defined($K_mm) );
my $type_m = $rLL->[$K_m]->[_TYPE_];
my $token_m = $rLL->[$K_m]->[_TOKEN_];
my $type_mm = $rLL->[$K_mm]->[_TYPE_];
# start of backwards search depends on the call type...
# note: see var $rsub_call_paren_info_by_seqno in sub respace_tokens
my $K_equals;
# 'function('
if ( $type_m eq 'U' || $type_m eq 'w' ) {
$K_equals = $K_mm;
}
# '->function('
elsif ( $type_m eq 'i' && $type_mm eq '->' ) {
my $K_mmm = $self->K_previous_code($K_mm);
my $K_mm4 = $self->K_previous_code($K_mmm);
return unless defined($K_mm4);
my $type_mmm = $rLL->[$K_mmm]->[_TYPE_];
# something like '$self->function('
if ( $type_mmm eq 'i' ) {
$K_equals = $K_mm4;
}
# something complex like '$hash_of_objects{my_obj}->function('
else {
# TBD:
return;
}
}
# '&function('
elsif ( $type_m eq 'i' && substr( $token_m, 0, 1 ) eq '&' ) {
$K_equals = $K_mm;
}
# '$function->(' [ TODO: simple anonymous sub call, not used yet ]
elsif ( $type_m eq '->' && $type_mm eq 'i' ) {
my $K_mmm = $self->K_previous_code($K_mm);
$K_equals = $K_mmm;
}
# error
else {
DEVEL_MODE
&& Fault(
"unexpected call with type_m=$type_m token_m=$token_m type_mm=$type_mm\n"
);
return;
}
# look for '='
# Note that this ignores a return via a slice, like
# ($v1,$v2) =(f(x))[1,3]
# because this is an array return, and we just want explicit lists
if ( !$K_equals || $rLL->[$K_equals]->[_TYPE_] ne '=' ) {
return;
}
my $K_c = $self->K_previous_code($K_equals);
return unless ( defined($K_c) );
my $type_c = $rLL->[$K_c]->[_TYPE_];
my $token_c = $rLL->[$K_c]->[_TOKEN_];
if ( $token_c ne ')' ) {
# Handle @array = f(x) or $scalar=f(x), and things like
# $rhash->{vv} = f();
# $hash{vv} = f();
# $array[$index] = f();
if ( $is_closing_type{$type_c} ) {
# backup from the closing brace to any identifier
# Note: currently only going back one index, a sub could
# be written to handle more complex things
my $seqno_c = $rLL->[$K_c]->[_TYPE_SEQUENCE_];
return if ( !$seqno_c );
my $Ko_c = $self->[_K_opening_container_]->{$seqno_c};
return unless defined($Ko_c);
my $K_c_new = $self->K_previous_code($Ko_c);
return unless defined($K_c_new);
$type_c = $rLL->[$K_c_new]->[_TYPE_];
$token_c = $rLL->[$K_c_new]->[_TOKEN_];
if ( $type_c eq '->' ) {
$K_c_new = $self->K_previous_code($K_c_new);
return unless defined($K_c_new);
$type_c = $rLL->[$K_c_new]->[_TYPE_];
$token_c = $rLL->[$K_c_new]->[_TOKEN_];
}
}
if ( $type_c eq 'i' || $type_c eq 't' ) {
my $sigil = substr( $token_c, 0, 1 );
if ( $sigil eq '$' ) {
$item->{return_count_wanted} = 1;
$item->{want_scalar} = 1;
}
}
return;
}
# Count elements in (list of values)=f(x)
my $seqno_lhs = $rLL->[$K_c]->[_TYPE_SEQUENCE_];
return unless ($seqno_lhs);
my $rhash = {};
$rhash->{seqno_list} = $seqno_lhs;
$self->count_list_elements($rhash);
my $return_count_wanted = $rhash->{shift_count_max};
if ( DEBUG_RETURN_COUNT > 1 ) {
print "DEBUG_RETURN_COUNT: want $return_count_wanted\n";
}
$item->{return_count_wanted} = $return_count_wanted;
return;
} ## end sub count_return_values_wanted
sub sub_def_info_maker {
my ( $self, $rpackage_lookup_list, $rprelim_call_info ) = @_;
# Given:
# $rpackage_lookup_list = list with info for finding containing package
# $rprelim_call_info = hash ref with first try at call info
# Returns two hash references:
# \%sub_info_by_seqno,
# \%sub_seqno_by_key,
# where
# $sub_info_by_seqno{seqno} = {
# seqno => $seqno,
# package => $package,
# name => $name,
# seqno_list => $seqno of the paren list of args
# shift_count => number of args
# is_signature => true if seqno_list is a sub signature
# self_name => name of first arg
# }
# and
# $sub_seqno_by_key{'package::name'} = seqno;
# which gives the seqno for a sub name
# TODO: possible future update:
# package name for 'my' sub and anonymous sub will be parent sub seqno
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_sub_block = $self->[_ris_sub_block_];
#----------------------------------
# Main loop over subs to count args
#----------------------------------
my @package_stack = reverse( @{$rpackage_lookup_list} );
my ( $current_package, $Kbegin, $Kend ) = @{ pop @package_stack };
my %sub_info_by_seqno;
my %sub_seqno_by_key;
foreach my $seqno ( sort { $a <=> $b } keys %{$ris_sub_block} ) {
# update the current package
my $Ko = $K_opening_container->{$seqno};
while ( $Ko > $Kend && @package_stack ) {
( $current_package, $Kbegin, $Kend ) = @{ pop @package_stack };
}
my $block_type = $rblock_type_of_seqno->{$seqno};
#-----------------------------
# Get the sub name and package
#-----------------------------
# Examples of what we want to extract from '$block_type':
# $block_type $name
# 'sub setidentifier($)' => 'setidentifier'
# 'method setidentifier($)' => 'setidentifier'
# Examples:
# "sub hello", "sub hello($)", "sub hello ($)"
# There will be a single space after 'sub' but any number before
# prototype
my $name = $block_type;
my $pos_space = index( $block_type, SPACE );
if ( $pos_space > 0 ) {
$name = substr( $block_type, $pos_space + 1 );
}
my $pos_paren = index( $name, '(' );
my $prototype;
if ( $pos_paren > 0 ) {
$prototype = substr( $name, $pos_paren );
$name = substr( $name, 0, $pos_paren );
$name =~ s/\s+$//;
}
my $package = $current_package;
if ( ( index( $name, ':' ) >= 0 || index( $name, "'" ) >= 0 )
&& $name =~ /^(.*\W)(\w+)$/ )
{
$package = $1;
$name = $2;
$package =~ s/\'/::/g;
$package =~ s/::$//;
}
$package = 'main' unless ($package);
# Make a hash of info for this sub
my $lno = $rLL->[$Ko]->[_LINE_INDEX_] + 1;
my $item = {
seqno => $seqno,
package => $package,
name => $name,
line_number => $lno,
};
my $key = $package . '::' . $name;
# Set flag indicating if args may be expected to allow optimization
my $call_item = $rprelim_call_info->{$key};
$item->{max_arg_count} = $call_item->{max_arg_count};
# Add a count of the number of input args
$self->count_sub_input_args($item);
# Add a count of the number of return args
$self->count_sub_return_args($item);
# Store the sub info by sequence number
$sub_info_by_seqno{$seqno} = $item;
# and save the sub sequence number indexed by sub name
$sub_seqno_by_key{$key} = $seqno;
}
return ( \%sub_info_by_seqno, \%sub_seqno_by_key );
} ## end sub sub_def_info_maker
sub update_sub_call_paren_info {
my ( $self, $rpackage_lookup_list ) = @_;
# Given:
# $rpackage_lookup_list = list with info for finding containing package
# Update the hash of info about the call parameters with arg counts
# and package. It contains the sequence number of each paren and
# type of call, and we must add the arg count and package.
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
my $rsub_call_paren_info_by_seqno =
$self->[_rsub_call_paren_info_by_seqno_];
my @package_stack = reverse( @{$rpackage_lookup_list} );
my ( $current_package, $Kbegin, $Kend ) = @{ pop @package_stack };
my $is_dollar_underscore_zero = sub {
my ($K_closing_bracket) = @_;
# Given:
# $K_closing_bracket - index of a ']'
# Return:
# true of this is the end of '$_[0]'
# false otherwise
#
# return $_[0]->PP_decode_json(...
# |
# ---$K_closing_bracket
return unless ($K_closing_bracket);
my $seqno = $rLL->[$K_closing_bracket]->[_TYPE_SEQUENCE_];
return unless ($seqno);
my $Ko = $K_opening_container->{$seqno};
return unless defined($Ko);
my $Knum = $self->K_next_code($Ko);
return unless ( $Knum && $rLL->[$Knum]->[_TOKEN_] eq '0' );
my $Kc = $self->K_next_code($Knum);
return unless ( $Kc eq $K_closing_bracket );
my $K_p = $self->K_previous_code($Ko);
return unless ( $rLL->[$K_p]->[_TOKEN_] eq '$_' );
return 1;
}; ## end $is_dollar_underscore_zero = sub
#----------------------------------------------
# Loop over sequence numbers of all call parens
#----------------------------------------------
# parens are of the form f( ->f( &f( where 'f' is a bareword
# ^ ^ ^
# Note that we do not handle anonymous subs because it is not possible to
# connect them to the actual sub definition.
foreach
my $seqno ( sort { $a <=> $b } keys %{$rsub_call_paren_info_by_seqno} )
{
# update the current package
my $Ko = $K_opening_container->{$seqno};
while ( $Ko > $Kend && @package_stack ) {
( $current_package, $Kbegin, $Kend ) = @{ pop @package_stack };
}
# get the next call list
my $item = $rsub_call_paren_info_by_seqno->{$seqno};
my $name = $item->{token_m};
my $type_mm = $item->{type_mm};
# find function and package
my $is_ampersand_call;
# name will be like '&function' for an & call
if ( substr( $name, 0, 1 ) eq '&' ) {
$is_ampersand_call = 1;
$name = substr( $name, 1 );
}
my $call_type = $is_ampersand_call ? '&' : EMPTY_STRING;
my $caller_name = EMPTY_STRING;
my $class_name = EMPTY_STRING;
if ( $type_mm eq '->' ) {
$call_type = '->';
my $K_m = $self->K_previous_code($Ko);
my $K_mm = $self->K_previous_code($K_m);
my $K_mmm = $self->K_previous_code($K_mm);
if ( defined($K_mmm) ) {
my $type_mmm = $rLL->[$K_mmm]->[_TYPE_];
my $token_mmm = $rLL->[$K_mmm]->[_TOKEN_];
if ( $type_mmm eq 'i' ) {
$caller_name = $token_mmm;
}
elsif ( $type_mmm eq 'w' ) {
## A::B->do_something( $var1, $var2 );
## wwww->iiiiiiiiiiii{ iiiii, iiiii };
if ( index( $token_mmm, '::' ) >= 0 ) {
$class_name = $token_mmm;
$class_name =~ s/::$//;
}
}
elsif ( $token_mmm eq ']' ) {
if ( $is_dollar_underscore_zero->($K_mmm) ) {
$caller_name = '$_[0]';
}
}
else { }
}
}
# look for explicit package on name
my $package = $current_package;
if ( ( index( $name, ':' ) >= 0 || index( $name, "'" ) >= 0 )
&& $name =~ /^(.*\W)(\w+)$/ )
{
$package = $1;
$name = $2;
$package =~ s/\'/::/g;
$package =~ s/::$//;
}
else {
if ($class_name) {
$package = $class_name;
}
}
if ( !$package ) { $package = 'main' }
# count the args
my $rtype_count = $rtype_count_by_seqno->{$seqno};
my $arg_count = 0;
if ($rtype_count) {
my $comma_count = $rtype_count->{','};
my $fat_comma_count = $rtype_count->{'=>'};
if ($comma_count) { $arg_count += $comma_count }
if ($fat_comma_count) { $arg_count += $fat_comma_count }
}
# The comma count does not include any trailing comma, so add 1..
if ( !$arg_count ) {
# ..but not if parens are empty
my $Kc = $K_closing_container->{$seqno};
my $Kn = $Ko + 1;
if ( $Kn < $Kc ) {
my $type_n = $rLL->[$Kn]->[_TYPE_];
if ( $type_n eq 'b' ) {
$Kn += 1;
$type_n = $rLL->[$Kn]->[_TYPE_];
}
if ( $type_n eq '#' ) {
$Kn = $self->K_next_code($Ko);
}
if ( $Kn != $Kc ) { $arg_count += 1 }
}
}
else {
$arg_count += 1;
}
# The arg count is undefined if there are non-scalars in the list
$item->{seqno_list} = $seqno;
if ($arg_count) {
$item->{is_signature} = 0;
$item->{shift_count_min} = 0;
$item->{self_name} = EMPTY_STRING;
$self->count_list_elements($item);
$arg_count = $item->{shift_count_min};
}
# get the return count expected for this call by scanning to the left
$self->count_return_values_wanted($item);
# update the hash of info for this item
my $line_number = $rLL->[$Ko]->[_LINE_INDEX_] + 1;
$item->{arg_count} = $arg_count;
$item->{package} = $package;
$item->{name} = $name;
$item->{line_number} = $line_number;
$item->{call_type} = $call_type;
$item->{caller_name} = $caller_name;
$item->{class_name} = $class_name;
}
return;
} ## end sub update_sub_call_paren_info
{
#-----------------------------------------------------
# Sub to look at first use of $self in a specified sub
#-----------------------------------------------------
my %self_call_cache;
my %is_oo_call_cache;
sub initialize_self_call_cache {
my $self = shift;
# must be called once per file before first call to sub self_call_check
%self_call_cache = ();
%is_oo_call_cache = ();
return;
} ## end sub initialize_self_call_cache
sub self_call_check {
my ( $self, $seqno_sub ) = @_;
# Try to decide if a sub call with '$self->' is a call to an
# internal sub by looking at the first '$self' usage.
# Given:
# $seqno_sub = sequence number of sub to be checked
# Return:
# $is_self_call = true if this is an internal $self-> call
# based on the first $self in the sub.
# $is_oo_call = true if a call '$self->' appears to be
# within an OO framework which hides the $self arg.
# This uses the variable _rK_first_self_by_sub_seqno_ which
# is set by sub respace_tokens.
my $is_self_call = $self_call_cache{$seqno_sub};
my $is_oo_call = $is_oo_call_cache{$seqno_sub};
if ( !defined($is_self_call) ) {
$is_self_call = 0;
$is_oo_call = 0;
my $rLL = $self->[_rLL_];
my $K_first_self =
$self->[_rK_first_self_by_sub_seqno_]->{$seqno_sub};
# an index K stored by respace_tokens may be 1 low
$K_first_self++
if ( $K_first_self
&& $rLL->[$K_first_self]->[_TYPE_] eq 'b' );
my $Kn = $self->K_next_code($K_first_self);
my $type_n = $Kn ? $rLL->[$Kn]->[_TYPE_] : 'b';
#-----------------------------------------
# Try 3a. if "$self->" then assume OO call
#-----------------------------------------
if ( $type_n eq '->' ) {
$is_self_call = 1;
# Also set a flag to reduce the call arg count by 1
# because it looks this is an OO system which
# hides the $self call arg.
# NOTE: to be sure, we could scan all sub args
# in advance to check that all first sub args
# are not named $self
$is_oo_call = 1;
}
#--------------------------
# Try 3b. "$self = bless"
#--------------------------
elsif ( $type_n eq '=' ) {
my $Knn = $self->K_next_code($Kn);
$is_self_call = $Knn && $rLL->[$Knn]->[_TOKEN_] eq 'bless';
}
# none of the above
else { }
$self_call_cache{$seqno_sub} = $is_self_call;
$is_oo_call_cache{$seqno_sub} = $is_oo_call;
}
return ( $is_self_call, $is_oo_call );
} ## end sub self_call_check
}
use constant DEBUG_SELF => 0;
sub cross_check_sub_calls {
my ($self) = @_;
# This routine looks for issues for these parameters:
# --dump-mismatched-args
# --warn-mismatched-args
# --dump-mismatched-returns
# --warn-mismatched-returns
# It returns a hash of values with any warnings found
my $rLL = $self->[_rLL_];
# The mismatched-args checks are indicated by these letters:
# a = both method and non-method calls to a sub
# - even for two subs in a different package
# o = overcount: call arg counts exceed number expected by a sub
# u = undercount: call arg counts less than number expected by a sub
# - except if expecting N or less (N=4 by default)
# i = indeterminate: expected number of args was not determined
my %call_arg_issue_note = (
a => "both method and non-method calls to a sub",
o => "excess args passed",
u => "fewer args than expected passed",
i => "indeterminate sub arg count",
);
my %do_mismatched_call_type = %call_arg_issue_note;
my $mismatched_arg_undercount_cutoff = 0;
my $mismatched_arg_overcount_cutoff = 0;
my $ris_mismatched_call_excluded_name = {};
# The mismatched-returns checks are indicated by these letters:
my %return_issue_note = (
x => "want array but no return seen",
y => "want scalar but no return seen",
o => "want array with excess count",
u => "want array with count not matched by sub",
s => "want scalar but sub only returns arrays with count >1",
);
my %do_mismatched_return_type = %return_issue_note;
my $ris_mismatched_return_excluded_name = {};
# initialize a cache used for efficiency
$self->initialize_self_call_cache();
my $is_dump =
$rOpts->{'dump-mismatched-args'} || $rOpts->{'dump-mismatched-returns'};
# initialize if not in a dump mode
if ( !$is_dump ) {
%do_mismatched_call_type = %{$rwarn_mismatched_arg_types};
$mismatched_arg_undercount_cutoff =
$rOpts->{'warn-mismatched-arg-undercount-cutoff'};
$mismatched_arg_overcount_cutoff =
$rOpts->{'warn-mismatched-arg-overcount-cutoff'};
$ris_mismatched_call_excluded_name =
$ris_warn_mismatched_arg_excluded_name;
%do_mismatched_return_type = %{$rwarn_mismatched_return_types};
$ris_mismatched_return_excluded_name =
$ris_warn_mismatched_return_excluded_name;
}
# hardwired name exclusions
$ris_mismatched_call_excluded_name->{AUTOLOAD} = 1;
$ris_mismatched_call_excluded_name->{DESTROY} = 1;
my $K_opening_container = $self->[_K_opening_container_];
my $rK_package_list = $self->[_rK_package_list_];
my $ris_my_sub_by_seqno = $self->[_ris_my_sub_by_seqno_];
my $rsub_call_paren_info_by_seqno =
$self->[_rsub_call_paren_info_by_seqno_];
my $rK_bless_by_sub_seqno = $self->[_rK_bless_by_sub_seqno_];
#----------------------------
# Make a package lookup table
#----------------------------
my $rpackage_lists = $self->package_info_maker($rK_package_list);
my $rpackage_lookup_list = $rpackage_lists->{'rpackage_lookup_list'};
#-------------------------------------------
# Update sub call paren info with arg counts
#-------------------------------------------
$self->update_sub_call_paren_info($rpackage_lookup_list);
#----------------------------------
# Preliminary min and max call args
#----------------------------------
# This is preliminary because some of the calls will eventually be
# rejected if they appear to be to external objects. This info is
# needed to optimize the sub arg search in the case of zero args.
my %upper_bound_call_info;
foreach my $seqno ( keys %{$rsub_call_paren_info_by_seqno} ) {
my $rcall_item = $rsub_call_paren_info_by_seqno->{$seqno};
my $call_type = $rcall_item->{call_type};
my $package = $rcall_item->{package};
my $name = $rcall_item->{name};
my $arg_count = $rcall_item->{arg_count};
my $key = $package . '::' . $name;
next unless defined($arg_count);
if ( $call_type eq '->' ) {
$arg_count += 1;
## $upper_bound_call_info{$key}->{method_call_count}++;
}
else {
## $upper_bound_call_info{$key}->{direct_call_count}++;
}
my $max = $upper_bound_call_info{$key}->{max_arg_count};
my $min = $upper_bound_call_info{$key}->{min_arg_count};
if ( !defined($max) || $arg_count > $max ) {
$upper_bound_call_info{$key}->{max_arg_count} = $arg_count;
}
if ( !defined($min) || $arg_count < $min ) {
$upper_bound_call_info{$key}->{min_arg_count} = $arg_count;
}
}
#-----------------------------------
# Get arg counts for sub definitions
#-----------------------------------
my ( $rsub_info_by_seqno, $rsub_seqno_by_key ) =
$self->sub_def_info_maker( $rpackage_lookup_list,
\%upper_bound_call_info );
# Hash to hold combined info for subs and calls
my %common_hash;
#---------------------------------------------
# First split the calls into direct and method
#---------------------------------------------
my @method_call_seqnos;
foreach my $seqno ( keys %{$rsub_call_paren_info_by_seqno} ) {
my $rcall_item = $rsub_call_paren_info_by_seqno->{$seqno};
my $package = $rcall_item->{package};
my $name = $rcall_item->{name};
my $key = $package . '::' . $name;
if ( $rcall_item->{call_type} eq '->' ) {
push @method_call_seqnos, $seqno;
}
else {
push @{ $common_hash{$key}->{direct_calls} }, $rcall_item;
}
}
#----------------------------------------------
# Now split method calls into self and external
#----------------------------------------------
my @debug_warnings;
foreach my $seqno (@method_call_seqnos) {
my $rcall_item = $rsub_call_paren_info_by_seqno->{$seqno};
my $package = $rcall_item->{package};
my $name = $rcall_item->{name};
my $caller_name = $rcall_item->{caller_name};
my $class_name = $rcall_item->{class_name};
my $key_receiver_sub = $package . '::' . $name;
my $is_self_call;
# Find the sub which contains this call
my $seqno_sub_parent = $self->parent_sub_seqno($seqno);
if ($seqno_sub_parent) {
my $item = $rsub_info_by_seqno->{$seqno_sub_parent};
if ($item) {
my $key_parent_sub = $item->{package} . '::' . $item->{name};
my $parent_self_name = $item->{self_name};
my $caller_is_dollar_self = $caller_name eq '$self';
# Decide if this method call is to an internal sub:
# Try 1 and Try 2 are general, for any object name
# Try 3 and Try 4 are guesses for common uses of '$self'
#------------------------------------------------
# Try 1: Parent sub self name matches caller name
#------------------------------------------------
if ($parent_self_name) {
# and the only calls to parent sub (if any) are arrow calls.
if (
$parent_self_name eq $caller_name
&& ( !$common_hash{$key_parent_sub}->{direct_calls}
|| $caller_is_dollar_self )
)
{
$is_self_call = 1;
}
}
#---------------------------------------------------------
# Try 2. See if the name was blessed in the containing sub
#---------------------------------------------------------
if ( !$is_self_call ) {
my $item_self = $item->{self_name};
$item_self = 'undef' unless $item_self;
my $rK_bless_list =
$rK_bless_by_sub_seqno->{$seqno_sub_parent};
if ($rK_bless_list) {
my $Ko = $K_opening_container->{$seqno};
foreach my $blessing ( @{$rK_bless_list} ) {
# Index K and blessed name were stored with sub.
# $K_blessed may be 1 token before K of '$self'
my ( $K_blessed, $name_blessed ) = @{$blessing};
# name of blessed object must match
next if ( $name_blessed ne $caller_name );
# keyword 'bless' must be at top sub level. We have
# to back up 1 token in case $self is in parens.
my $Kp = $self->K_previous_code($K_blessed);
next if ( !$Kp );
my $parent_seqno = $self->parent_seqno_by_K($Kp);
next
if (!$parent_seqno
|| $parent_seqno != $seqno_sub_parent );
# bless must be before the call
next if ( $K_blessed > $Ko );
$is_self_call = 1;
last;
}
}
}
#-------------------------------------------------------
# Try 3. Caller is '$self'; look at first '$self' in sub
#-------------------------------------------------------
if ( !$is_self_call && $caller_is_dollar_self ) {
( $is_self_call, $rcall_item->{is_oo_call} ) =
$self->self_call_check($seqno_sub_parent);
}
#-------------------------------------------------------------
# Try 4. caller is '$self': receiver='$self', '$class', '$_[0]'
#-------------------------------------------------------------
if ( !$is_self_call && $caller_is_dollar_self ) {
my $seqno_sub_called =
$rsub_seqno_by_key->{$key_receiver_sub};
if ($seqno_sub_called) {
my $item_called =
$rsub_info_by_seqno->{$seqno_sub_called};
my $receiver = $item_called->{self_name};
#------------------------------------------------
# Try 4a: receiver has some recognized self names
#------------------------------------------------
if (
$receiver
&& ( $receiver eq $caller_name
|| $receiver eq '$class'
|| $receiver eq '$_[0]' )
)
{
$is_self_call = 1;
}
#-----------------------------------
# Try 4b: check for a recursive call
#-----------------------------------
else {
$is_self_call =
$seqno_sub_called == $seqno_sub_parent;
}
}
}
if ( DEBUG_SELF
&& !$is_self_call
&& $caller_is_dollar_self
&& $seqno_sub_parent )
{
my $Ko_sub = $K_opening_container->{$seqno_sub_parent};
my $ln_parent = $rLL->[$Ko_sub]->[_LINE_INDEX_] + 1;
my $Ko = $K_opening_container->{$seqno};
my $ln = $rLL->[$Ko]->[_LINE_INDEX_] + 1;
my $parent_self = $item->{self_name};
my $receiver_self = 'missing';
my $ln_receiver = 'undef';
my $seqno_sub_called =
$rsub_seqno_by_key->{$key_receiver_sub};
if ($seqno_sub_called) {
my $item_called =
$rsub_info_by_seqno->{$seqno_sub_called};
$receiver_self = $item_called->{self_name};
my $Ko_receiver =
$K_opening_container->{$seqno_sub_called};
$ln_receiver = $rLL->[$Ko_receiver]->[_LINE_INDEX_] + 1;
}
# use DEBUG_SELF=3 to see missing subs
else {
next if ( DEBUG_SELF < 3 );
}
# use DEBUG_SELF=2 to see undef-self-undef
next
if ( DEBUG_SELF < 2 && !$parent_self && !$receiver_self );
if ( !$parent_self ) { $parent_self = 'undef' }
if ( !$receiver_self ) { $receiver_self = 'undef' }
push @debug_warnings,
{
Ko => $Ko,
caller_name => $caller_name,
parent_self => $parent_self,
receiver_self => $receiver_self,
sub_called => $name,
line_number => $ln,
ln_parent => $ln_parent,
ln_receiver => $ln_receiver,
};
}
}
}
# Save this method call as either an internal (self) or external call
if ($is_self_call) {
push @{ $common_hash{$key_receiver_sub}->{self_calls} },
$rcall_item;
}
else {
# mark calls made by unknown (non-self) objects, we can't track
# them, but we can track calls at the class level.
if ( !$class_name ) {
$rcall_item->{is_unknown_object_call} = 1;
}
}
}
if ( DEBUG_SELF && @debug_warnings ) {
@debug_warnings = sort { $a->{Ko} <=> $b->{Ko} } @debug_warnings;
my $output_string = EMPTY_STRING;
foreach my $item (@debug_warnings) {
## my $caller_name = $item->{caller_name};
my $parent_self = $item->{parent_self};
my $receiver_self = $item->{receiver_self};
my $sub_called = $item->{sub_called};
my $line_number = $item->{line_number};
my $ln_parent = $item->{ln_parent};
my $ln_receiver = $item->{ln_receiver};
$output_string .=
"$line_number: \$self->$sub_called in parent line $ln_parent with self=$parent_self to receiver line $ln_receiver with self=$receiver_self\n";
}
warning($output_string);
}
#-------------------------------
# Loop to merge prototype counts
#-------------------------------
foreach my $key ( keys %common_hash ) {
my $seqno_sub = $rsub_seqno_by_key->{$key};
next if ( !defined($seqno_sub) );
my $rsub_item = $rsub_info_by_seqno->{$seqno_sub};
next if ( !$rsub_item->{prototype} );
my $item = $common_hash{$key};
my $rdirect_calls = $item->{direct_calls};
my $rself_calls = $item->{self_calls};
my $num_direct = defined($rdirect_calls) ? @{$rdirect_calls} : 0;
my $num_self = defined($rself_calls) ? @{$rself_calls} : 0;
# Use prototype values if given and all calls are direct
# Otherwise, ignore the prototype.
next if ($num_self);
next if ( !$num_direct );
my $shift_count_min = $rsub_item->{prototype_count_min};
my $shift_count_max = $rsub_item->{prototype_count_max};
if ($num_self) {
if ( defined($shift_count_min) ) { $shift_count_min++ }
if ( defined($shift_count_max) ) { $shift_count_max++ }
}
# For calls with '&' to subs with prototypes, use the upper bound of
# the prototype max and the max found by scanning the script.
my $shift_count_max_amp = $shift_count_max;
if ( defined($shift_count_max) ) {
my $standard_max = $rsub_item->{shift_count_max};
if ( !defined($standard_max) || $standard_max > $shift_count_max ) {
$shift_count_max_amp = $standard_max;
}
}
$rsub_item->{shift_count_max_amp} = $shift_count_max_amp;
# overwrite values found by scanning the script with prototype values
$rsub_item->{shift_count_min} = $shift_count_min;
$rsub_item->{shift_count_max} = $shift_count_max;
}
#--------------------------------------------------------------
# Loop over all sub calls to compare call and return arg counts
#--------------------------------------------------------------
foreach my $seqno ( keys %{$rsub_call_paren_info_by_seqno} ) {
my $rcall_item = $rsub_call_paren_info_by_seqno->{$seqno};
# Skip method calls by unknown objects
next if ( $rcall_item->{is_unknown_object_call} );
my $arg_count = $rcall_item->{arg_count};
my $return_count_wanted = $rcall_item->{return_count_wanted};
my $want_scalar = $rcall_item->{want_scalar};
my $package = $rcall_item->{package};
my $name = $rcall_item->{name};
my $call_type = $rcall_item->{call_type};
my $key = $package . '::' . $name;
my ( $shift_count_min, $shift_count_max, $self_name );
my ( $return_count_min, $return_count_max, $return_count_indefinite );
my ($rK_return_count_hash);
# look for the sub ..
my $seqno_sub = $rsub_seqno_by_key->{$key};
my $rK_return_list;
my $saw_wantarray;
if ( defined($seqno_sub) ) {
my $rsub_item = $rsub_info_by_seqno->{$seqno_sub};
$saw_wantarray =
defined( $self->[_rK_wantarray_by_sub_seqno_]->{$seqno_sub} );
# skip 'my' subs for now, they need special treatment. If
# anonymous subs are added, 'my' subs could also be added then.
if ( !$ris_my_sub_by_seqno->{$seqno_sub} ) {
$common_hash{$key}->{rsub_item} = $rsub_item;
$shift_count_min = $rsub_item->{shift_count_min};
$shift_count_max = $rsub_item->{shift_count_max};
if ( $call_type eq '&' && $rsub_item->{prototype} ) {
$shift_count_max = $rsub_item->{shift_count_max_amp};
}
$self_name = $rsub_item->{self_name};
$return_count_min = $rsub_item->{return_count_min};
$return_count_max = $rsub_item->{return_count_max};
$return_count_indefinite =
$rsub_item->{return_count_indefinite};
$rK_return_list =
$self->[_rK_return_by_sub_seqno_]->{$seqno_sub};
## $common_hash{$key}->{rK_return_list} = $rK_return_list;
$rK_return_count_hash = $rsub_item->{rK_return_count_hash};
}
}
#------------------------------------
# compare caller/sub input arg counts
#------------------------------------
if ( defined($shift_count_min) && defined($arg_count) ) {
if ( $call_type eq '->' && !$rcall_item->{is_oo_call} ) {
$arg_count += 1;
}
my $excess = $arg_count - $shift_count_min;
my $max = $common_hash{$key}->{max_arg_count};
my $min = $common_hash{$key}->{min_arg_count};
if ( !defined($max) || $arg_count > $max ) {
$common_hash{$key}->{max_arg_count} = $arg_count;
}
if ( !defined($min) || $arg_count < $min ) {
$common_hash{$key}->{min_arg_count} = $arg_count;
}
if ( $excess < 0 ) {
push @{ $common_hash{$key}->{under_count} }, $rcall_item;
}
elsif ( $excess > 0 ) {
if ( defined($shift_count_max) ) {
$excess = $arg_count - $shift_count_max;
if ( $excess > 0 ) {
push @{ $common_hash{$key}->{over_count} }, $rcall_item;
}
}
}
else {
## $excess = 0
}
}
#---------------------------------------------
# compare caller/sub return counts if possible
#---------------------------------------------
# rhs check: only check subs returning finite lists (i.e. not '@list');
next if ($return_count_indefinite);
# lhs check: only check when a finite return list is wanted
next if ( !$return_count_wanted );
# ignore scalar if wantarray seen
next if ( $want_scalar && $saw_wantarray );
# update min-max want ranges for the output report
my $max = $common_hash{$key}->{want_count_max};
my $min = $common_hash{$key}->{want_count_min};
if ( !defined($max) || $return_count_wanted > $max ) {
$common_hash{$key}->{want_count_max} = $return_count_wanted;
}
if ( !defined($min) || $return_count_wanted < $min ) {
$common_hash{$key}->{want_count_min} = $return_count_wanted;
}
# return issue 'x': want array but no return seen
# return issue 'y': want scalar but no return seen
if ( !defined($rK_return_list) ) {
if ($want_scalar) {
push @{ $common_hash{$key}->{return_issues}->{y} }, $rcall_item;
}
else {
push @{ $common_hash{$key}->{return_issues}->{x} }, $rcall_item;
}
}
# safety check
elsif ( !defined($return_count_max) ) {
# shouldn't happen-should be defined if $rK_return_list is defined
DEVEL_MODE && Fault("return_count_max should be defined here\n");
}
# check for exact match
elsif ( $return_count_wanted == $return_count_max ) {
## ok
}
# return issue 'o': overwant
elsif ( $return_count_wanted > $return_count_max ) {
# but no error for scalar request of 1 when max 0 returned
if ( !$want_scalar ) {
push @{ $common_hash{$key}->{return_issues}->{o} }, $rcall_item;
}
}
# if want less than max...
else {
# issue 'u': want array for an unmatched count less than max
# issue 's': want scalar but all return counts are >1
if ( defined($rK_return_count_hash) ) {
my $K_return = $rK_return_count_hash->{$return_count_wanted};
if ( !defined($K_return) ) {
if ($want_scalar) {
push @{ $common_hash{$key}->{return_issues}->{s} },
$rcall_item;
}
else {
push @{ $common_hash{$key}->{return_issues}->{u} },
$rcall_item;
}
}
}
else {
## safety check, shouldn't happen
DEVEL_MODE && Fault("return count hash not defined\n");
}
}
}
#------------------------------------
# Construct one-line warning messages
#------------------------------------
my @call_arg_warnings;
my @return_warnings;
my $max_shift_count_with_undercount = 0;
my $number_of_undercount_warnings = 0;
# variables with information about a sub needed for warning output:
my (
$lno, $name,
$shift_count_min, $shift_count_max,
$min_arg_count, $max_arg_count,
$return_count_min, $return_count_max,
$want_count_min, $want_count_max,
);
my $push_call_arg_warning = sub {
my ( $letter, $note ) = @_;
my $shift_count = $shift_count_min;
if ( $shift_count_min ne '*' && $shift_count_min ne $shift_count_max ) {
$shift_count = "$shift_count_min-$shift_count_max";
}
my $output_line =
"$lno:$letter:$name:$shift_count:$min_arg_count:$max_arg_count: $note\n";
push @call_arg_warnings,
{
line_number => $lno,
letter => $letter,
name => $name,
output_line => $output_line,
};
return;
}; ## end $push_call_arg_warning = sub
my $push_return_warning = sub {
my ( $letter, $note, $lno_return ) = @_;
my $return_count = $return_count_min;
if ( $return_count_min ne '*'
&& $return_count_min ne $return_count_max )
{
$return_count = "$return_count_min-$return_count_max";
}
my $output_line =
"$lno_return:$letter:$name:$return_count:$want_count_min:$want_count_max: $note\n";
push @return_warnings,
{
line_number => $lno_return,
letter => $letter,
name => $name,
output_line => $output_line,
};
return;
}; ## end $push_return_warning = sub
#-------------------
# Loop over each sub
#-------------------
foreach my $key ( keys %common_hash ) {
my $item = $common_hash{$key};
# Check for mixed method/direct calls:
my $rsub_item = $item->{rsub_item};
next unless defined($rsub_item);
$name = $rsub_item->{name};
$lno = $rsub_item->{line_number};
## my $rK_return_list = $item->{rK_return_list};
my $rself_calls = $item->{self_calls};
my $rdirect_calls = $item->{direct_calls};
my $num_self = defined($rself_calls) ? @{$rself_calls} : 0;
my $num_direct = defined($rdirect_calls) ? @{$rdirect_calls} : 0;
## my $K_return_count_min = $rsub_item->{K_return_count_min};
my $K_return_count_max = $rsub_item->{K_return_count_max};
$shift_count_min = $rsub_item->{shift_count_min};
$shift_count_max = $rsub_item->{shift_count_max};
$return_count_min = $rsub_item->{return_count_min};
$return_count_max = $rsub_item->{return_count_max};
$min_arg_count = $item->{min_arg_count};
$max_arg_count = $item->{max_arg_count};
$want_count_min = $item->{want_count_min};
$want_count_max = $item->{want_count_max};
# change undefs to '*' for the output text
foreach (
$shift_count_min, $shift_count_max,
$return_count_min, $return_count_max,
$min_arg_count, $max_arg_count,
$want_count_min, $want_count_max,
)
{
$_ = '*' unless defined($_);
}
#-----------------------------------------------------------------
# Make a one-line message for each mismatch call issue of this sub
#-----------------------------------------------------------------
my $rover_count = $item->{over_count};
my $runder_count = $item->{under_count};
my $num_over_count = defined($rover_count) ? @{$rover_count} : 0;
my $num_under_count = defined($runder_count) ? @{$runder_count} : 0;
#--------------------------------------------------
# issue 'a': subs with both self-> and direct calls
#--------------------------------------------------
if ( $num_self && $num_direct && $do_mismatched_call_type{'a'} ) {
my $letter = 'a';
my $lines_self_calls = stringify_line_range($rself_calls);
my $lines_direct_calls = stringify_line_range($rdirect_calls);
my $self_name = $rsub_item->{self_name};
if ( !defined($self_name) ) { $self_name = EMPTY_STRING }
my $ess1 = $num_self > 1 ? 's' : EMPTY_STRING;
my $ess2 = $num_direct > 1 ? 's' : EMPTY_STRING;
my $str = $self_name . '->call' . $ess1;
my $note =
"$num_self $str($lines_self_calls) and $num_direct call$ess2($lines_direct_calls)";
$push_call_arg_warning->( $letter, $note );
}
#---------------------------------------------------------
# Ignore calls to a sub which was not defined in this file
#---------------------------------------------------------
if ( !defined($rsub_item) ) {
next;
}
#-------------------------------------------------------------------
# issue 'i': indeterminate. Could not determine a specific arg count
#-------------------------------------------------------------------
if ( $shift_count_min eq '*' ) {
my $letter = 'i';
if ( $do_mismatched_call_type{$letter} ) {
# skip *:*:* (no disagreement - call counts also indeterminate)
next
if ( $shift_count_min eq $min_arg_count
&& $shift_count_min eq $max_arg_count );
my $note = $call_arg_issue_note{$letter};
$push_call_arg_warning->( $letter, $note );
}
}
# otherwise check call arg counts
else {
#---------------------
# issue 'o': overcount
#---------------------
if ( $num_over_count
&& $do_mismatched_call_type{'o'}
&& $shift_count_max >= $mismatched_arg_overcount_cutoff )
{
my $letter = 'o';
my $line_range = stringify_line_range($rover_count);
my $total = $num_direct + $num_self;
my $note = $call_arg_issue_note{$letter};
$note .=
$total > 1
? " at $num_over_count of $total calls ($line_range)"
: " at $line_range";
$push_call_arg_warning->( $letter, $note );
}
#----------------------
# issue 'u': undercount
#----------------------
if ($num_under_count) {
if ( $shift_count_min > $max_shift_count_with_undercount ) {
$max_shift_count_with_undercount = $shift_count_min;
}
# Skip the warning for small lists with undercount
if ( $do_mismatched_call_type{'u'}
&& $shift_count_min >= $mismatched_arg_undercount_cutoff )
{
my $letter = 'u';
my $line_range = stringify_line_range($runder_count);
my $total = $num_direct + $num_self;
my $note = $call_arg_issue_note{$letter};
$note .=
$total > 1
? " at $num_under_count of $total calls ($line_range)"
: " at $line_range";
$number_of_undercount_warnings++;
$push_call_arg_warning->( $letter, $note );
}
}
}
#-------------------------------------------------------------------
# Make a one-line message for each mismatch return issue of this sub
#-------------------------------------------------------------------
my $return_issues = $item->{return_issues};
if ($return_issues) {
foreach my $letter ( keys %return_issue_note ) {
next if ( !$do_mismatched_return_type{$letter} );
my $rissues = $return_issues->{$letter};
my $number = defined($rissues) ? @{$rissues} : 0;
next unless ($number);
my $line_range = stringify_line_range($rissues);
my $total = $num_direct + $num_self;
my $note = $return_issue_note{$letter};
$note .=
$total > 1
? " at $number of $total calls ($line_range)"
: " at $line_range";
# The one-line message shows the line number of the return
# with the maximum count if there are returns. If no returns
# (types 'x' and 'y') it shows the first line of the sub ($lno).
my $lno_return =
defined($K_return_count_max)
? $rLL->[$K_return_count_max]->[_LINE_INDEX_] + 1
: $lno;
$push_return_warning->( $letter, $note, $lno_return );
} ## end loop to save one line for mismatched returns
}
}
#-----------------------------------------------
# Make the sorted/filtered call arg issue report
#-----------------------------------------------
my $rcall_arg_warnings = sort_warnings( \@call_arg_warnings );
$rcall_arg_warnings = filter_excluded_names( $rcall_arg_warnings,
$ris_mismatched_call_excluded_name );
my $call_arg_warning_output = EMPTY_STRING;
my $call_arg_hint = EMPTY_STRING;
if ( @{$rcall_arg_warnings} ) {
my $header =
"Issue types are 'a'=arrow mismatch 'u'=undercount 'o'=overcount";
if ($is_dump) { $header .= " 'i'=indeterminate" }
$call_arg_warning_output = <<EOM;
$header
Line:Issue:Sub:#args:Min:Max: note
EOM
foreach ( @{$rcall_arg_warnings} ) {
$call_arg_warning_output .= $_->{output_line};
}
if ( !$is_dump && $number_of_undercount_warnings ) {
my $wmauc_min = $max_shift_count_with_undercount + 1;
$call_arg_hint = <<EOM;
Note: use -wmauc=$wmauc_min or greater to prevent undercount warnings in this file
or put parentheses around default sub args and use -wmauc=0
EOM
$call_arg_warning_output .= $call_arg_hint;
}
}
#---------------------------------------------
# Make the sorted/filtered return issue report
#---------------------------------------------
my $rreturn_warnings = sort_warnings( \@return_warnings );
$rreturn_warnings = filter_excluded_names( $rreturn_warnings,
$ris_mismatched_return_excluded_name );
my $return_warning_output = EMPTY_STRING;
if ( @{$rreturn_warnings} ) {
$return_warning_output = <<EOM;
Issue types 'u'=under-want 'o'=over-want 'x','y'=no return 's'=scalar-array mix
Line:Issue:Sub:#Returned:Min_wanted:Max_wanted: note
EOM
foreach ( @{$rreturn_warnings} ) {
$return_warning_output .= $_->{output_line};
}
}
return {
call_arg_warning_output => $call_arg_warning_output,
return_warning_output => $return_warning_output,
};
} ## end sub cross_check_sub_calls
sub sort_warnings {
my ($rwarnings) = @_;
# Given:
# $rwarnigns = ref to list of warning info hashes
# Return updated $rwarnings
# - Sorted by line number
if ( @{$rwarnings} ) {
# sort by line number
$rwarnings = [
sort {
$a->{line_number} <=> $b->{line_number}
|| $a->{letter} cmp $b->{letter}
} @{$rwarnings}
];
}
return $rwarnings;
} ## end sub sort_warnings
sub stringify_line_range {
my ($rcalls) = @_;
# Given:
# $rcalls = ref to list of call info
# Return:
# $string = single line of text with just the line range
my $string = EMPTY_STRING;
if ( $rcalls && @{$rcalls} ) {
my @sorted =
sort { $a->{line_number} <=> $b->{line_number} } @{$rcalls};
my $num = @sorted;
my $lno_beg = $sorted[0]->{line_number};
my $lno_end = $sorted[-1]->{line_number};
if ( $num == 1 ) {
$string = "line $lno_beg";
}
elsif ( $num == 2 ) {
$string = "lines $lno_beg,$lno_end";
}
else {
$string = "lines $lno_beg..$lno_end";
}
}
return $string;
} ## end sub stringify_line_range
sub initialize_warn_mismatched {
# a - mismatched arrow operator calls
# o - overcount
# u - undercount
$rwarn_mismatched_arg_types =
initialize_warn_hash( 'warn-mismatched-arg-types', 1, [qw( a o u )] );
$ris_warn_mismatched_arg_excluded_name =
make_excluded_name_hash('warn-mismatched-arg-exclusion-list');
# x - want array but no return seen
# o - want array with excess count
# u - want array with unmatched count
# y - want scalar but no return seen
# s - want scalar but only arrays with count > 1 returned
$rwarn_mismatched_return_types =
initialize_warn_hash( 'warn-mismatched-return-types',
1, [qw( x o u y s )] );
$ris_warn_mismatched_return_excluded_name =
make_excluded_name_hash('warn-mismatched-return-exclusion-list');
return;
} ## end sub initialize_warn_mismatched
sub warn_mismatched {
my ($self) = @_;
# process both --warn-mismatched-args and --warn-mismatched-returns,
my $rhash = $self->cross_check_sub_calls();
my $wma_key = 'warn-mismatched-args';
if ( $rOpts->{$wma_key} ) {
my $output_lines = $rhash->{call_arg_warning_output};
if ($output_lines) {
chomp $output_lines;
warning(<<EOM);
Begin scan for --$wma_key
$output_lines
End scan for --$wma_key
EOM
}
}
my $wmr_key = 'warn-mismatched-returns';
if ( $rOpts->{$wmr_key} ) {
my $output_lines = $rhash->{return_warning_output};
if ($output_lines) {
chomp $output_lines;
warning(<<EOM);
Begin scan for --$wmr_key
$output_lines
End scan for --$wmr_key
EOM
}
}
return;
} ## end sub warn_mismatched
sub dump_mismatched_args {
my ($self) = @_;
# process a --dump-mismatched-args command
my $rhash = $self->cross_check_sub_calls();
my $output_string = $rhash->{call_arg_warning_output};
if ($output_string) {
my $input_stream_name = get_input_stream_name();
chomp $output_string;
print {*STDOUT} <<EOM;
$input_stream_name: output for --dump-mismatched-args
$output_string
EOM
}
return;
} ## end sub dump_mismatched_args
sub dump_mismatched_returns {
my ($self) = @_;
# process a --dump-mismatched-returns command
my $rhash = $self->cross_check_sub_calls();
my $output_string = $rhash->{return_warning_output};
if ($output_string) {
my $input_stream_name = get_input_stream_name();
chomp $output_string;
print {*STDOUT} <<EOM;
$input_stream_name: output for --dump-mismatched-returns
$output_string
EOM
}
return;
} ## end sub dump_mismatched_returns
sub check_for_old_break {
my ( $self, $KK, $rkeep_break_hash, $rbreak_hash ) = @_;
# This sub is called to help implement flags:
# --keep-old-breakpoints-before and --keep-old-breakpoints-after
# Given:
# $KK = index of a token,
# $rkeep_break_hash = user control for --keep-old-...
# $rbreak_hash = hash of tokens where breaks are requested
# Set $rbreak_hash as follows if a user break is requested:
# = 1 make a hard break (flush the current batch)
# best for something like leading commas (-kbb=',')
# = 2 make a soft break (keep building current batch)
# best for something like leading ->
my $rLL = $self->[_rLL_];
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
# non-container tokens use the type as the key
if ( !$seqno ) {
my $type = $rLL->[$KK]->[_TYPE_];
if ( $rkeep_break_hash->{$type} ) {
$rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
}
}
# container tokens use the token as the key
else {
my $token = $rLL->[$KK]->[_TOKEN_];
my $flag = $rkeep_break_hash->{$token};
if ($flag) {
my $match = $flag eq '1' || $flag eq '*';
# check for special matching codes
if ( !$match ) {
if ( $token eq '(' || $token eq ')' ) {
$match = $self->match_paren_control_flag( $seqno, $flag );
}
elsif ( $token eq '{' || $token eq '}' ) {
# These tentative codes 'b' and 'B' for brace types are
# placeholders for possible future brace types. They
# are not documented and may be changed.
my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno};
if ( $flag eq 'b' ) { $match = $block_type }
elsif ( $flag eq 'B' ) { $match = !$block_type }
else {
## unknown code - no match
DEVEL_MODE && Fault(<<EOM);
unexpected code '$flag' for --keep-old-breakpoints: expecting 'b' or 'B'
EOM
}
}
else {
# no match
}
}
if ($match) {
my $type = $rLL->[$KK]->[_TYPE_];
$rbreak_hash->{$KK} = $is_soft_keep_break_type{$type} ? 2 : 1;
}
}
}
return;
} ## end sub check_for_old_break
sub keep_old_line_breaks {
my ($self) = @_;
# Called once per file to find and mark any old line breaks which
# should be kept. We will be translating the input hashes into
# token indexes.
# A flag is set as follows:
# = 1 make a hard break (flush the current batch)
# best for something like leading commas (-kbb=',')
# = 2 make a soft break (keep building current batch)
# best for something like leading ->
my $rLL = $self->[_rLL_];
my $rKrange_code_without_comments =
$self->[_rKrange_code_without_comments_];
my $rbreak_before_Kfirst = $self->[_rbreak_before_Kfirst_];
my $rbreak_after_Klast = $self->[_rbreak_after_Klast_];
my $rbreak_container = $self->[_rbreak_container_];
#----------------------------------------
# Apply --break-at-old-method-breakpoints
#----------------------------------------
# This code moved here from sub break_lists to fix b1120
if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
foreach my $item ( @{$rKrange_code_without_comments} ) {
my ( $Kfirst, $Klast ) = @{$item};
my $type = $rLL->[$Kfirst]->[_TYPE_];
my $token = $rLL->[$Kfirst]->[_TOKEN_];
# leading '->' use a value of 2 which causes a soft
# break rather than a hard break
if ( $type eq '->' ) {
# ignore -bom after an opening token ( a syntax error, b1475 )
my $Kp = $self->K_previous_nonblank($Kfirst);
next if ( !defined($Kp) );
next if ( $is_opening_type{ $rLL->[$Kp]->[_TYPE_] } );
# ignore -bom if this does not look like a method call; c426
my $Kn = $self->K_next_nonblank($Kfirst);
next if ( !defined($Kn) );
my $token_n = $rLL->[$Kn]->[_TYPE_];
next if ( $token_n eq '{' || $token_n eq '[' );
$rbreak_before_Kfirst->{$Kfirst} = 2;
}
# leading ')->' use a special flag to insure that both
# opening and closing parens get opened
# Fix for b1120: only for parens, not braces
elsif ( $token eq ')' ) {
my $Kn = $self->K_next_nonblank($Kfirst);
next if ( !defined($Kn) );
next if ( $Kn > $Klast );
next if ( $rLL->[$Kn]->[_TYPE_] ne '->' );
my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
next if ( !$seqno );
# Note: in previous versions there was a fix here to avoid
# instability between conflicting -bom and -pvt or -pvtc flags.
# The fix skipped -bom for a small line difference. But this
# was troublesome, and instead the fix has been moved to
# sub set_vertical_tightness_flags where priority is given to
# the -bom flag over -pvt and -pvtc flags. Both opening and
# closing paren flags are involved because even though -bom only
# requests breaking before the closing paren, automated logic
# opens the opening paren when the closing paren opens.
# Relevant cases are b977, b1215, b1270, b1303
# ignore -bom if this does not look like a method call; c426
$Kn = $self->K_next_nonblank($Kn);
next if ( !defined($Kn) );
my $token_n = $rLL->[$Kn]->[_TYPE_];
next if ( $token_n eq '{' || $token_n eq '[' );
$rbreak_container->{$seqno} = 1;
}
else {
# not a special case
}
}
}
#---------------------------------------------------------------------
# Apply --keep-old-breakpoints-before and --keep-old-breakpoints-after
#---------------------------------------------------------------------
return unless ( %keep_break_before_type || %keep_break_after_type );
foreach my $item ( @{$rKrange_code_without_comments} ) {
my ( $Kfirst, $Klast ) = @{$item};
$self->check_for_old_break( $Kfirst, \%keep_break_before_type,
$rbreak_before_Kfirst );
$self->check_for_old_break( $Klast, \%keep_break_after_type,
$rbreak_after_Klast );
}
return;
} ## end sub keep_old_line_breaks
sub weld_containers {
my ($self) = @_;
# Called once per file to do any welding operations requested by --weld*
# flags.
# This count is used to eliminate needless calls for weld checks elsewhere
$total_weld_count = 0;
return if ( $rOpts->{'indent-only'} );
return unless ($rOpts_add_newlines);
# Important: sub 'weld_cuddled_blocks' must be called before
# sub 'weld_nested_containers'. This is because the cuddled option needs to
# use the original _LEVEL_ values of containers, but the weld nested
# containers changes _LEVEL_ of welded containers.
# Here is a good test case to be sure that both cuddling and welding
# are working and not interfering with each other: <<snippets/ce_wn1.in>>
# perltidy -wn -ce
# if ($BOLD_MATH) { (
# $labels, $comment,
# join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
# ) } else { (
# &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
# $after
# ) }
$self->weld_cuddled_blocks() if ( %{$rcuddled_block_types} );
if ( $rOpts->{'weld-nested-containers'} ) {
$self->weld_nested_containers();
$self->weld_nested_quotes();
}
#-------------------------------------------------------------
# All welding is done. Finish setting up weld data structures.
#-------------------------------------------------------------
my $rLL = $self->[_rLL_];
my $rK_weld_left = $self->[_rK_weld_left_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rweld_len_right_at_K = $self->[_rweld_len_right_at_K_];
my @K_multi_weld;
my @keys = keys %{$rK_weld_right};
$total_weld_count = @keys;
# First pass to process binary welds.
# This loop is processed in unsorted order for efficiency.
foreach my $Kstart (@keys) {
my $Kend = $rK_weld_right->{$Kstart};
# An error here would be due to an incorrect initialization introduced
# in one of the above weld routines, like sub weld_nested.
if ( $Kend <= $Kstart ) {
Fault("Bad weld link: Kend=$Kend <= Kstart=$Kstart\n")
if (DEVEL_MODE);
next;
}
# Set weld values for all tokens this welded pair
foreach ( $Kstart + 1 .. $Kend ) {
$rK_weld_left->{$_} = $Kstart;
}
foreach my $Kx ( $Kstart .. $Kend - 1 ) {
$rK_weld_right->{$Kx} = $Kend;
$rweld_len_right_at_K->{$Kx} =
$rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
$rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
}
# Remember the leftmost index of welds which continue to the right
if ( defined( $rK_weld_right->{$Kend} )
&& !defined( $rK_weld_left->{$Kstart} ) )
{
push @K_multi_weld, $Kstart;
}
}
# Second pass to process chains of welds (these are rare).
# This has to be processed in sorted order.
if (@K_multi_weld) {
my $Kend = -1;
foreach my $Kstart ( sort { $a <=> $b } @K_multi_weld ) {
# Skip any interior K which was originally missing a left link
next if ( $Kstart <= $Kend );
# Find the end of this chain
$Kend = $rK_weld_right->{$Kstart};
my $Knext = $rK_weld_right->{$Kend};
while ( defined($Knext) ) {
if ( $Knext <= $Kend ) {
## shouldn't happen: K should increase for right weld
DEVEL_MODE && Fault(<<EOM);
Error: Knext=$Knext = rK_weld_right->{$Kend} is not increasing
EOM
last;
}
$Kend = $Knext;
$Knext = $rK_weld_right->{$Kend};
} ## end while ( defined($Knext) )
# Set weld values this chain
foreach ( $Kstart + 1 .. $Kend ) {
$rK_weld_left->{$_} = $Kstart;
}
foreach my $Kx ( $Kstart .. $Kend - 1 ) {
$rK_weld_right->{$Kx} = $Kend;
$rweld_len_right_at_K->{$Kx} =
$rLL->[$Kend]->[_CUMULATIVE_LENGTH_] -
$rLL->[$Kx]->[_CUMULATIVE_LENGTH_];
}
}
}
return;
} ## end sub weld_containers
sub weld_cuddled_blocks {
my ($self) = @_;
# Called once per file to handle cuddled formatting
my $rK_weld_left = $self->[_rK_weld_left_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# This routine implements the -cb flag by finding the appropriate
# closing and opening block braces and welding them together.
return unless ( %{$rcuddled_block_types} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $rbreak_container = $self->[_rbreak_container_];
my $ris_broken_container = $self->[_ris_broken_container_];
my $ris_cuddled_closing_brace = $self->[_ris_cuddled_closing_brace_];
my $K_closing_container = $self->[_K_closing_container_];
# A stack to remember open chains at all levels: This is a hash rather than
# an array for safety because negative levels can occur in files with
# errors. This allows us to keep processing with negative levels.
# $in_chain{$level} = [$chain_type, $type_sequence];
my %in_chain;
my $CBO = $rOpts->{'cuddled-break-option'};
# loop over structure items to find cuddled pairs
my $level = 0;
foreach my $KK ( @{ $self->[_rK_sequenced_token_list_] } ) {
my $rtoken_vars = $rLL->[$KK];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$type_sequence ) {
next if ( $KK == 0 ); # first token in file may not be container
# A fault here implies that an error was made in the little loop at
# the bottom of sub 'respace_tokens' which set the values of
# _rK_sequenced_token_list_. Or an error has been introduced in
# the loop control lines above.
Fault("sequence = $type_sequence not defined at K=$KK")
if (DEVEL_MODE);
next;
}
# NOTE: we must use the original levels here. They can get changed
# by sub 'weld_nested_containers', so this routine must be called
# before sub 'weld_nested_containers'.
my $last_level = $level;
$level = $rtoken_vars->[_LEVEL_];
if ( $level < $last_level ) { $in_chain{$last_level} = undef }
elsif ( $level > $last_level ) { $in_chain{$level} = undef }
else {
# level unchanged
}
# We are only looking at code blocks
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
next unless ( $type eq $token );
if ( $token eq '{' ) {
my $block_type = $rblock_type_of_seqno->{$type_sequence};
if ( !$block_type ) {
# patch for unrecognized block types which may not be labeled
my $Kp = $self->K_previous_code($KK);
next unless $Kp;
$block_type = $rLL->[$Kp]->[_TOKEN_];
}
if ( $in_chain{$level} ) {
# we are in a chain and are at an opening block brace.
# See if we are welding this opening brace with the previous
# block brace. Get their identification numbers:
my $closing_seqno = $in_chain{$level}->[1];
my $opening_seqno = $type_sequence;
# The preceding block must be on multiple lines so that its
# closing brace will start a new line.
if ( !$ris_broken_container->{$closing_seqno}
&& !$rbreak_container->{$closing_seqno} )
{
next unless ( $CBO == 2 );
$rbreak_container->{$closing_seqno} = 1;
}
# We can weld the closing brace to its following word ..
my $Ko = $K_closing_container->{$closing_seqno};
my $Kon;
if ( defined($Ko) ) {
$Kon = $self->K_next_nonblank($Ko);
}
# ..unless it is a comment
if ( defined($Kon) && $rLL->[$Kon]->[_TYPE_] ne '#' ) {
# OK to weld these two tokens...
$rK_weld_right->{$Ko} = $Kon;
$rK_weld_left->{$Kon} = $Ko;
# Set flag that we want to break the next container
# so that the cuddled line is balanced.
$rbreak_container->{$opening_seqno} = 1
if ($CBO);
# Remember which braces are cuddled.
# The closing brace is used to set adjusted indentations.
# The opening brace is not yet used but might eventually
# be needed in setting adjusted indentation.
$ris_cuddled_closing_brace->{$closing_seqno} = 1;
}
}
else {
# We are not in a chain. Start a new chain if we see the
# starting block type.
if ( $rcuddled_block_types->{$block_type} ) {
$in_chain{$level} = [ $block_type, $type_sequence ];
}
else {
$block_type = '*';
$in_chain{$level} = [ $block_type, $type_sequence ];
}
}
}
elsif ( $token eq '}' ) {
if ( $in_chain{$level} ) {
# We are in a chain at a closing brace. See if this chain
# continues..
my $Knn = $self->K_next_code($KK);
next unless $Knn;
my $chain_type = $in_chain{$level}->[0];
my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
if (
$rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
)
{
# Note that we do not weld yet because we must wait until
# we we are sure that an opening brace for this follows.
$in_chain{$level}->[1] = $type_sequence;
}
else { $in_chain{$level} = undef }
}
}
else {
# not a curly brace
}
}
return;
} ## end sub weld_cuddled_blocks
sub find_nested_pairs {
my ($self) = @_;
# This routine is called once per file to do preliminary work needed for
# the --weld-nested option. This information is also needed for adding
# semicolons.
# Returns:
# \@nested_pairs = ref to a list in which each item is a ref to
# to the sequence numbers of two nested containers:
# [ $seqno_inner, $seqno_outer ]
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $Num = @{$rLL};
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rK_next_seqno_by_K = $self->[_rK_next_seqno_by_K_];
# We define an array of pairs of nested containers
my @nested_pairs;
# Names of calling routines can either be marked as 'i' or 'w',
# and they may invoke a sub call with an '->'. We will consider
# any consecutive string of such types as a single unit when making
# weld decisions. We also allow a leading !
my $is_name_type = {
'i' => 1,
'w' => 1,
'U' => 1,
'->' => 1,
'!' => 1,
};
# Loop over all closing container tokens
foreach my $inner_seqno ( keys %{$K_closing_container} ) {
my $K_inner_closing = $K_closing_container->{$inner_seqno};
# See if it is immediately followed by another, outer closing token
my $K_outer_closing = $K_inner_closing + 1;
$K_outer_closing += 1
if ( $K_outer_closing < $Num
&& $rLL->[$K_outer_closing]->[_TYPE_] eq 'b' );
next if ( $K_outer_closing >= $Num );
my $outer_seqno = $rLL->[$K_outer_closing]->[_TYPE_SEQUENCE_];
next if ( !$outer_seqno );
my $token_outer_closing = $rLL->[$K_outer_closing]->[_TOKEN_];
next if ( !$is_closing_token{$token_outer_closing} );
# Simple filter: No commas or semicolons in the outer container
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$outer_seqno};
if ($rtype_count) {
next if ( $rtype_count->{','} || $rtype_count->{';'} );
}
# Now we have to check the opening tokens.
my $K_outer_opening = $K_opening_container->{$outer_seqno};
my $K_inner_opening = $K_opening_container->{$inner_seqno};
next if ( !defined($K_outer_opening) );
next if ( !defined($K_inner_opening) );
my $inner_blocktype = $rblock_type_of_seqno->{$inner_seqno};
my $outer_blocktype = $rblock_type_of_seqno->{$outer_seqno};
# Verify that the inner opening token is the next container after the
# outer opening token.
my $K_io_check = $rK_next_seqno_by_K->[$K_outer_opening];
next unless defined($K_io_check);
if ( $K_io_check != $K_inner_opening ) {
# The inner opening container does not immediately follow the outer
# opening container, but we may still allow a weld if they are
# separated by a sub signature. For example, we may have something
# like this, where $K_io_check may be at the first 'x' instead of
# 'io'. So we need to hop over the signature and see if we arrive
# at 'io'.
# oo io
# | x x |
# $obj->then( sub ( $code ) {
# ...
# return $c->render(text => '', status => $code);
# } );
# | |
# ic oc
next if ( !$inner_blocktype || $inner_blocktype ne 'sub' );
next if $rLL->[$K_io_check]->[_TOKEN_] ne '(';
my $seqno_signature = $rLL->[$K_io_check]->[_TYPE_SEQUENCE_];
next unless defined($seqno_signature);
my $K_signature_closing = $K_closing_container->{$seqno_signature};
next unless defined($K_signature_closing);
my $K_test = $rK_next_seqno_by_K->[$K_signature_closing];
next
unless ( defined($K_test) && $K_test == $K_inner_opening );
# OK, we have arrived at 'io' in the above diagram. We should put
# a limit on the length or complexity of the signature here. There
# is no perfect way to do this, one way is to put a limit on token
# count. For consistency with older versions, we should allow a
# signature with a single variable to weld, but not with
# multiple variables. A single variable as in 'sub ($code) {' can
# have a $Kdiff of 2 to 4, depending on spacing.
# But two variables like 'sub ($v1,$v2) {' can have a diff of 4 to
# 7, depending on spacing. So to keep formatting consistent with
# previous versions, we will also avoid welding if there is a comma
# in the signature.
my $Kdiff = $K_signature_closing - $K_io_check;
next if ( $Kdiff > 4 );
# backup comma count test; but we cannot get here with Kdiff<=4
my $rtc = $self->[_rtype_count_by_seqno_]->{$seqno_signature};
next if ( $rtc && $rtc->{','} );
}
# Yes .. this is a possible nesting pair.
# They can be separated by a small amount.
my $K_diff = $K_inner_opening - $K_outer_opening;
# Count the number of nonblank characters separating them.
# Note: the $nonblank_count includes the inner opening container
# but not the outer opening container, so it will be >= 1.
if ( $K_diff < 0 ) {
# Shouldn't happen
DEVEL_MODE
&& Fault(
"unexpected negative index diff=$K_diff = Kio-Koo =$K_inner_opening - $K_outer_opening"
);
next;
}
my $nonblank_count = 0;
my $type;
my $is_name;
# Here is an example of a long identifier chain which counts as a
# single nonblank here (this spans about 10 K indexes):
# if ( !Boucherot::SetOfConnections->new->handler->execute(
# ^--K_o_o ^--K_i_o
# @array) )
my $Kn_first = $K_outer_opening;
my $Kn_last_nonblank;
my $saw_comment;
foreach my $Kn ( $K_outer_opening + 1 .. $K_inner_opening ) {
next if ( $rLL->[$Kn]->[_TYPE_] eq 'b' );
if ( !$nonblank_count ) { $Kn_first = $Kn }
if ( $Kn eq $K_inner_opening ) { $nonblank_count++; last; }
$Kn_last_nonblank = $Kn;
# skip chain of identifier tokens
my $last_type = $type;
my $last_is_name = $is_name;
$type = $rLL->[$Kn]->[_TYPE_];
if ( $type eq '#' ) { $saw_comment = 1; last }
$is_name = $is_name_type->{$type};
next if ( $is_name && $last_is_name );
# do not count a possible leading - of bareword hash key
next if ( $type eq 'm' && !$last_type );
$nonblank_count++;
last if ( $nonblank_count > 2 );
}
# Do not weld across a comment .. fix for c058.
next if ($saw_comment);
# Patch for b1104: do not weld to a paren preceded by sort/map/grep
# because the special line break rules may cause a blinking state
if ( defined($Kn_last_nonblank)
&& $rLL->[$K_inner_opening]->[_TOKEN_] eq '('
&& $rLL->[$Kn_last_nonblank]->[_TYPE_] eq 'k' )
{
my $token = $rLL->[$Kn_last_nonblank]->[_TOKEN_];
# Turn off welding at sort/map/grep (
if ( $is_sort_map_grep{$token} ) { $nonblank_count = 10 }
}
my $token_oo = $rLL->[$K_outer_opening]->[_TOKEN_];
if (
# 1: adjacent opening containers, like: do {{
$nonblank_count == 1
# 2. anonymous sub + prototype or sig: )->then( sub ($code) {
# ... but it seems best not to stack two structural blocks, like
# this
# sub make_anon_with_my_sub { sub {
# because it probably hides the structure a little too much.
|| ( $inner_blocktype
&& $inner_blocktype eq 'sub'
&& $rLL->[$Kn_first]->[_TOKEN_] eq 'sub'
&& !$outer_blocktype )
# 3. short item following opening paren, like: fun( yyy (
|| $nonblank_count == 2 && $token_oo eq '('
# 4. weld around fat commas, if requested (git #108), such as
# elf->call_method( method_name_foo => {
|| ( $type eq '=>'
&& $nonblank_count <= 3
&& %weld_fat_comma_rules
&& $weld_fat_comma_rules{$token_oo} )
)
{
push @nested_pairs,
[ $inner_seqno, $outer_seqno, $K_inner_closing ];
}
next;
}
#------------------------------------
# Make the final list of nested pairs
#------------------------------------
# The weld routine expects the pairs in order in the form
# [$seqno_inner, $seqno_outer]
# And they must be in the same order as the inner closing tokens
# (otherwise, welds of three or more adjacent tokens will not work). The K
# value of this inner closing token has temporarily been stored for
# sorting.
@nested_pairs =
# Drop the K index after sorting (it would cause trouble downstream)
map { [ $_->[0], $_->[1] ] }
# Sort on the K values
sort { $a->[2] <=> $b->[2] } @nested_pairs;
return \@nested_pairs;
} ## end sub find_nested_pairs
sub match_paren_control_flag {
my ( $self, $seqno, $flag, ($rLL) ) = @_;
# Input parameters:
# $seqno = sequence number of the container (should be paren)
# $flag = the flag which defines what matches
# $rLL = an optional alternate token list needed for respace operations
# Decide if this paren is excluded by user request:
# undef matches no parens
# '*' matches all parens
# 'k' matches only if the previous nonblank token is a perl builtin
# keyword (such as 'if', 'while'),
# 'K' matches if 'k' does not, meaning if the previous token is not a
# keyword.
# 'f' matches if the previous token is a function other than a keyword.
# 'F' matches if 'f' does not.
# 'w' matches if either 'k' or 'f' match.
# 'W' matches if 'w' does not.
$rLL = $self->[_rLL_] unless ( defined($rLL) );
return 0 unless ( defined($flag) );
return 0 if $flag eq '0';
return 1 if $flag eq '1';
return 1 if $flag eq '*';
return 0 unless ($seqno);
my $K_opening = $self->[_K_opening_container_]->{$seqno};
return unless ( defined($K_opening) );
my ( $is_f, $is_k, $is_w );
my $Kp = $self->K_previous_nonblank( $K_opening, $rLL );
if ( defined($Kp) ) {
my $type_p = $rLL->[$Kp]->[_TYPE_];
# keyword?
$is_k = $type_p eq 'k';
# function call?
$is_f = $self->[_ris_function_call_paren_]->{$seqno};
# either keyword or function call?
$is_w = $is_k || $is_f;
}
my $match;
if ( $flag eq 'k' ) { $match = $is_k }
elsif ( $flag eq 'K' ) { $match = !$is_k }
elsif ( $flag eq 'f' ) { $match = $is_f }
elsif ( $flag eq 'F' ) { $match = !$is_f }
elsif ( $flag eq 'w' ) { $match = $is_w }
elsif ( $flag eq 'W' ) { $match = !$is_w }
else {
## no match
DEVEL_MODE && Fault(<<EOM);
unexpected code '$flag' in sub match_paren_control_flag: expecting one of kKfFwW
EOM
}
return $match;
} ## end sub match_paren_control_flag
sub is_excluded_weld {
my ( $self, $KK, $is_leading ) = @_;
# Decide if this weld is excluded by user request
# Given:
# $KK = index of this weld token
# $is_leading = true if this will the outer token of a weld
my $rLL = $self->[_rLL_];
my $rtoken_vars = $rLL->[$KK];
my $token = $rtoken_vars->[_TOKEN_];
my $rflags = $weld_nested_exclusion_rules{$token};
return 0 unless ( defined($rflags) );
my $flag = $is_leading ? $rflags->[0] : $rflags->[1];
return 0 unless ( defined($flag) );
return 1 if $flag eq '*';
my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
return $self->match_paren_control_flag( $seqno, $flag );
} ## end sub is_excluded_weld
# hashes to simplify welding logic
my %type_ok_after_bareword;
my %has_tight_paren;
BEGIN {
# types needed for welding RULE 6
my @q = qw# => -> { ( [ #;
@type_ok_after_bareword{@q} = (1) x scalar(@q);
# these types do not 'like' to be separated from a following paren
@q = qw( w i q Q G C Z U );
@has_tight_paren{@q} = (1) x scalar(@q);
} ## end BEGIN
use constant DEBUG_WELD => 0;
sub setup_new_weld_measurements {
my ( $self, $Kouter_opening, $Kinner_opening ) = @_;
# Define quantities to check for excess line lengths when welded.
# Called by sub 'weld_nested_containers' and sub 'weld_nested_quotes'
# Given:
# ($Kouter_opening, $Kinner_opening) = indexes of outer and inner opening
# containers to be welded
# Returns these variables:
# $new_weld_ok = true (new weld ok) or false (do not start new weld)
# $starting_indent = starting indentation
# $starting_lentot = starting cumulative length
# $msg = diagnostic message for debugging
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rK_next_seqno_by_K = $self->[_rK_next_seqno_by_K_];
my $starting_level;
my $starting_ci;
my $starting_lentot;
my $maximum_text_length;
my $msg = EMPTY_STRING;
my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
my $rK_range = $rlines->[$iline_oo]->{_rK_range};
my ( $Kfirst, $Klast_uu ) = @{$rK_range};
#-------------------------------------------------------------------------
# We now define a reference index, '$Kref', from which to start measuring
# This choice turns out to be critical for keeping welds stable during
# iterations, so we go through a number of STEPS...
#-------------------------------------------------------------------------
# STEP 1: Our starting guess is to use measure from the first token of the
# current line. This is usually a good guess.
my $Kref = $Kfirst;
# STEP 2: See if we should go back a little farther
my $Kprev = $self->K_previous_nonblank($Kfirst);
if ( defined($Kprev) ) {
# Avoid measuring from between an opening paren and a previous token
# which should stay close to it ... fixes b1185
my $token_oo = $rLL->[$Kouter_opening]->[_TOKEN_];
my $type_prev = $rLL->[$Kprev]->[_TYPE_];
if ( $Kouter_opening == $Kfirst
&& $token_oo eq '('
&& $has_tight_paren{$type_prev} )
{
$Kref = $Kprev;
}
# Back up and count length from a token like '=' or '=>' if -lp
# is used (this fixes b520)
# ...or if a break is wanted before there
elsif ($rOpts_line_up_parentheses
|| $want_break_before{$type_prev} )
{
# If there are other sequence items between the start of this line
# and the opening token in question, then do not include tokens on
# the previous line in length calculations. This check added to
# fix case b1174 which had a '?' on the line
my $no_previous_seq_item = $Kref == $Kouter_opening
|| $rK_next_seqno_by_K->[$Kref] == $Kouter_opening;
if ( $no_previous_seq_item
&& substr( $type_prev, 0, 1 ) eq '=' )
{
$Kref = $Kprev;
# Fix for b1144 and b1112: backup to the first nonblank
# character before the =>, or to the start of its line.
if ( $type_prev eq '=>' ) {
my $iline_prev = $rLL->[$Kprev]->[_LINE_INDEX_];
my $rK_range_prev = $rlines->[$iline_prev]->{_rK_range};
my ( $Kfirst_prev, $Klast_prev_uu ) = @{$rK_range_prev};
my $nb_count = 0;
foreach my $KK ( reverse( $Kfirst_prev .. $Kref - 1 ) ) {
next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
$Kref = $KK;
# Continue at type 'w' to get previous dash. Example:
# -classification => [ qw(
# This fixes b1502.
last if ( $nb_count || $rLL->[$KK]->[_TYPE_] ne 'w' );
$nb_count++;
}
}
}
}
else {
# do not need to backup
}
}
# STEP 3: Now look ahead for a ternary and, if found, use it.
# This fixes case b1182.
# Also look for a ')' at the same level and, if found, use it.
# This fixes case b1224.
if ( $Kref < $Kouter_opening ) {
my $Knext = $rK_next_seqno_by_K->[$Kref];
my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
my $Knext_last = $Knext;
while ( $Knext && $Knext < $Kouter_opening ) {
if ( $rLL->[$Knext]->[_LEVEL_] == $level_oo ) {
if ( $is_ternary{ $rLL->[$Knext]->[_TYPE_] }
|| $rLL->[$Knext]->[_TOKEN_] eq ')' )
{
$Kref = $Knext;
last;
}
}
$Knext = $rK_next_seqno_by_K->[$Knext];
if ( $Knext <= $Knext_last ) {
## shouldn't happen: $rK_next_seqno_by_K is corrupted
DEVEL_MODE && Fault(<<EOM);
Knext should not increase: Knext_last=$Knext_last >= Knext=$Knext
EOM
last;
}
$Knext_last = $Knext;
} ## end while ( $Knext && $Knext ...)
}
# fix c1468 - do not measure from a leading opening block brace -
# which is not a one-line block
if ( $Kref < $Kouter_opening
&& $Kref == $Kfirst
&& $rLL->[$Kref]->[_TOKEN_] eq '{' )
{
my $seqno_ref = $rLL->[$Kref]->[_TYPE_SEQUENCE_];
if ($seqno_ref) {
my $block_type = $self->[_rblock_type_of_seqno_]->{$seqno_ref};
if ($block_type) {
my $Kref_c = $self->[_K_closing_container_]->{$seqno_ref};
my $ln_ref_o = $rLL->[$Kref]->[_LINE_INDEX_];
my $ln_ref_c = $rLL->[$Kref_c]->[_LINE_INDEX_];
if ( $ln_ref_c > $ln_ref_o ) {
$Kref = $self->K_next_nonblank($Kref);
}
}
}
}
# Define the starting measurements we will need
$starting_lentot =
$Kref <= 0 ? 0 : $rLL->[ $Kref - 1 ]->[_CUMULATIVE_LENGTH_];
$starting_level = $rLL->[$Kref]->[_LEVEL_];
$starting_ci = $rLL->[$Kref]->[_CI_LEVEL_];
$maximum_text_length = $maximum_text_length_at_level[$starting_level] -
$starting_ci * $rOpts_continuation_indentation;
# STEP 4: Switch to using the outer opening token as the reference
# point if a line break before it would make a longer line.
# Fixes case b1055 and is also an alternate fix for b1065.
my $starting_level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
if ( $Kref < $Kouter_opening ) {
my $starting_ci_oo = $rLL->[$Kouter_opening]->[_CI_LEVEL_];
my $lentot_oo = $rLL->[ $Kouter_opening - 1 ]->[_CUMULATIVE_LENGTH_];
my $maximum_text_length_oo =
$maximum_text_length_at_level[$starting_level_oo] -
$starting_ci_oo * $rOpts_continuation_indentation;
# The excess length to any cumulative length K = lenK is either
# $excess = $lenk - ($lentot + $maximum_text_length), or
# $excess = $lenk - ($lentot_oo + $maximum_text_length_oo),
# so the worst case (maximum excess) corresponds to the configuration
# with minimum value of the sum: $lentot + $maximum_text_length
if ( $lentot_oo + $maximum_text_length_oo <
$starting_lentot + $maximum_text_length )
{
$Kref = $Kouter_opening;
$starting_level = $starting_level_oo;
$starting_ci = $starting_ci_oo;
$starting_lentot = $lentot_oo;
$maximum_text_length = $maximum_text_length_oo;
}
}
my $new_weld_ok = 1;
# STEP 5, fix b1020: Avoid problem areas with the -wn -lp combination. The
# combination -wn -lp -dws -naws does not work well and can cause blinkers.
# It will probably only occur in stress testing. For this situation we
# will only start a new weld if we start at a 'good' location.
# - Added 'if' to fix case b1032.
# - Require blank before certain previous characters to fix b1111.
# - Add ';' to fix case b1139
# - Convert from '$ok_to_weld' to '$new_weld_ok' to fix b1162.
# - relaxed constraints for b1227
# - added skip if type is 'q' for b1349 and b1350 b1351 b1352 b1353
# - added skip if type is 'Q' for b1447
if ( $starting_ci
&& $rOpts_line_up_parentheses
&& $rOpts_delete_old_whitespace
&& !$rOpts_add_whitespace
&& $rLL->[$Kinner_opening]->[_TYPE_] ne 'q'
&& $rLL->[$Kinner_opening]->[_TYPE_] ne 'Q'
&& defined($Kprev) )
{
my $type_first = $rLL->[$Kfirst]->[_TYPE_];
my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
my $type_prev = $rLL->[$Kprev]->[_TYPE_];
my $type_pp = 'b';
if ( $Kprev >= 0 ) { $type_pp = $rLL->[ $Kprev - 1 ]->[_TYPE_] }
my $is_good_location =
$type_prev =~ /^[\,\.\;]/
|| ( $type_prev =~ /^[=\{\[\(\L]/
&& ( $type_pp eq 'b' || $type_pp eq '}' || $type_first eq 'k' ) )
|| $type_first =~ /^[=\,\.\;\{\[\(\L]/
|| $type_first eq '||'
|| (
$type_first eq 'k'
&& ( $token_first eq 'if'
|| $token_first eq 'or' )
);
if ( !$is_good_location ) {
$msg =
"Skipping weld: poor break with -lp and ci at type_first='$type_first' type_prev='$type_prev' type_pp=$type_pp\n";
$new_weld_ok = 0;
}
}
return ( $new_weld_ok, $maximum_text_length, $starting_lentot, $msg );
} ## end sub setup_new_weld_measurements
sub excess_line_length_for_Krange {
my ( $self, $Kfirst, $Klast ) = @_;
# returns $excess_length =
# by how many characters a line composed of tokens $Kfirst .. $Klast will
# exceed the allowed line length
my $rLL = $self->[_rLL_];
my $length_before_Kfirst =
$Kfirst <= 0
? 0
: $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
# backup before a side comment if necessary
my $Kend = $Klast;
if ( $rOpts_ignore_side_comment_lengths
&& $rLL->[$Klast]->[_TYPE_] eq '#' )
{
my $Kprev = $self->K_previous_nonblank($Klast);
if ( defined($Kprev) && $Kprev >= $Kfirst ) { $Kend = $Kprev }
}
# get the length of the text
my $length = $rLL->[$Kend]->[_CUMULATIVE_LENGTH_] - $length_before_Kfirst;
# get the size of the text window
my $level = $rLL->[$Kfirst]->[_LEVEL_];
my $ci_level = $rLL->[$Kfirst]->[_CI_LEVEL_];
my $max_text_length = $maximum_text_length_at_level[$level] -
$ci_level * $rOpts_continuation_indentation;
my $excess_length = $length - $max_text_length;
DEBUG_WELD
&& print
"Kfirst=$Kfirst, Klast=$Klast, Kend=$Kend, level=$level, ci=$ci_level, max_text_length=$max_text_length, length=$length\n";
return ($excess_length);
} ## end sub excess_line_length_for_Krange
sub weld_nested_containers {
my ($self) = @_;
# Called once per file for option '--weld-nested-containers'
my $rK_weld_left = $self->[_rK_weld_left_];
my $rK_weld_right = $self->[_rK_weld_right_];
# This routine implements the -wn flag by "welding together"
# the nested closing and opening tokens which were previously
# identified by sub 'find_nested_pairs'. "welding" simply
# involves setting certain hash values which will be checked
# later during formatting.
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rK_next_seqno_by_K = $self->[_rK_next_seqno_by_K_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_asub_block = $self->[_ris_asub_block_];
my $rmax_vertical_tightness = $self->[_rmax_vertical_tightness_];
my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
# Find nested pairs of container tokens for any welding.
my $rnested_pairs = $self->find_nested_pairs();
# Return unless there are nested pairs to weld
return unless ( defined($rnested_pairs) && @{$rnested_pairs} );
# NOTE: It would be nice to apply RULE 5 right here by deleting unwanted
# pairs. But it isn't clear if this is possible because we don't know
# which sequences might actually start a weld.
my $rOpts_break_at_old_method_breakpoints =
$rOpts->{'break-at-old-method-breakpoints'};
# This array will hold the sequence numbers of the tokens to be welded.
my @welds;
# Variables needed for estimating line lengths
my $maximum_text_length; # maximum spaces available for text
my $starting_lentot; # cumulative text to start of current line
my $iline_outer_opening = -1;
my $weld_count_this_start = 0;
my $weld_starts_in_block = 0;
# OLD: $single_line_tol added to fix cases b1180 b1181
# = $rOpts_continuation_indentation > $rOpts_indent_columns ? 1 : 0;
# NEW: $single_line_tol=0 fixes b1212; and b1180-1181 work ok now
# =1 for -vmll and -lp; fixes b1452, b1453, b1454
# NOTE: the combination -vmll and -lp can be unstable, especially when
# also combined with -wn. It may eventually be necessary to turn off -vmll
# if -lp is set. For now, this works. The value '1' is a minimum which
# works but can be increased if necessary.
my $single_line_tol =
$rOpts_variable_maximum_line_length && $rOpts_line_up_parentheses
? 1
: 0;
my $multiline_tol = $single_line_tol + 1 +
max( $rOpts_indent_columns, $rOpts_continuation_indentation );
# Define a welding cutoff level: do not start a weld if the inside
# container level equals or exceeds this level.
# We use the minimum of two criteria, either of which may be more
# restrictive. The 'alpha' value is more restrictive in (b1206, b1252) and
# the 'beta' value is more restrictive in other cases (b1243).
# Reduced beta term from beta+3 to beta+2 to fix b1401. Previously:
# my $weld_cutoff_level = min($stress_level_alpha, $stress_level_beta + 2);
# This is now '$high_stress_level'.
# The vertical tightness flags can throw off line length calculations.
# This patch was added to fix instability issue b1284.
# It works to always use a tol of 1 for 1 line block length tests, but
# this restricted value keeps test case wn6.wn working as before.
# It may be necessary to include '[' and '{' here in the future.
my $one_line_tol = $opening_vertical_tightness{'('} ? 1 : 0;
# Abbreviations:
# _oo=outer opening, i.e. first of { {
# _io=inner opening, i.e. second of { {
# _oc=outer closing, i.e. second of } {
# _ic=inner closing, i.e. first of } }
my $previous_pair;
# Main loop over nested pairs...
# We are working from outermost to innermost pairs so that
# level changes will be complete when we arrive at the inner pairs.
while ( @{$rnested_pairs} ) {
my $item = pop @{$rnested_pairs};
my ( $inner_seqno, $outer_seqno ) = @{$item};
my $Kouter_opening = $K_opening_container->{$outer_seqno};
my $Kinner_opening = $K_opening_container->{$inner_seqno};
my $Kouter_closing = $K_closing_container->{$outer_seqno};
my $Kinner_closing = $K_closing_container->{$inner_seqno};
# RULE: do not weld if inner container has <= 3 tokens unless the next
# token is a heredoc (so we know there will be multiple lines)
if ( $Kinner_closing - $Kinner_opening <= 4 ) {
my $Knext_nonblank = $self->K_next_nonblank($Kinner_opening);
next unless defined($Knext_nonblank);
my $type = $rLL->[$Knext_nonblank]->[_TYPE_];
next unless ( $type eq 'h' );
}
my $outer_opening = $rLL->[$Kouter_opening];
my $inner_opening = $rLL->[$Kinner_opening];
my $outer_closing = $rLL->[$Kouter_closing];
my $inner_closing = $rLL->[$Kinner_closing];
# RULE: do not weld to a hash brace. The reason is that it has a very
# strong bond strength to the next token, so a line break after it
# may not work. Previously we allowed welding to something like @{
# but that caused blinking states (cases b751, b779).
if ( $inner_opening->[_TYPE_] eq 'L' ) {
next;
}
# RULE: do not weld to a square bracket which does not contain commas
if ( $inner_opening->[_TYPE_] eq '[' ) {
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$inner_seqno};
next unless ( $rtype_count && $rtype_count->{','} );
# Do not weld if there is text before a '[' such as here:
# curr_opt ( @beg [2,5] )
# It will not break into the desired sandwich structure.
# This fixes case b109, 110.
my $Kdiff = $Kinner_opening - $Kouter_opening;
next if ( $Kdiff > 2 );
next
if ( $Kdiff == 2
&& $rLL->[ $Kouter_opening + 1 ]->[_TYPE_] ne 'b' );
}
# RULE: Avoid welding under stress. The idea is that we need to have a
# little space* within a welded container to avoid instability. Note
# that after each weld the level values are reduced, so long multiple
# welds can still be made. This rule will seldom be a limiting factor
# in actual working code. Fixes b1206, b1243.
my $inner_level = $inner_opening->[_LEVEL_];
if ( $inner_level >= $high_stress_level ) { next }
# extra tolerance added under high stress to fix b1481
my $stress_tol = ( $high_stress_level - $inner_level <= 1 ) ? 1 : 0;
# Set flag saying if this pair starts a new weld
my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
# Set flag saying if this pair is adjacent to the previous nesting pair
# (even if previous pair was rejected as a weld)
my $touch_previous_pair =
defined($previous_pair) && $outer_seqno == $previous_pair->[0];
$previous_pair = $item;
my $do_not_weld_rule = 0;
my $Msg = EMPTY_STRING;
my $is_one_line_weld;
my $iline_oo = $outer_opening->[_LINE_INDEX_];
my $iline_io = $inner_opening->[_LINE_INDEX_];
my $iline_ic = $inner_closing->[_LINE_INDEX_];
my $iline_oc = $outer_closing->[_LINE_INDEX_];
my $token_oo = $outer_opening->[_TOKEN_];
my $token_io = $inner_opening->[_TOKEN_];
# DO-NOT-WELD RULE 7: Do not weld if this conflicts with -bom
# Added for case b973. Moved here from below to fix b1423.
if ( !$do_not_weld_rule
&& $rOpts_break_at_old_method_breakpoints
&& $iline_io > $iline_oo )
{
foreach my $iline ( $iline_oo + 1 .. $iline_io ) {
my $rK_range = $rlines->[$iline]->{_rK_range};
next unless defined($rK_range);
my ( $Kfirst, $Klast_uu ) = @{$rK_range};
next unless defined($Kfirst);
if ( $rLL->[$Kfirst]->[_TYPE_] eq '->' ) {
$do_not_weld_rule = 7;
last;
}
}
}
next if ($do_not_weld_rule);
# Turn off vertical tightness at possible one-line welds. Fixes b1402,
# b1419, b1421, b1424, b1425. This also fixes issues b1338, b1339,
# b1340, b1341, b1342, b1343, which previously used a separate fix.
# Issue c161 is the latest and simplest check, using
# $iline_ic==$iline_io as the test.
if ( %opening_vertical_tightness
&& $iline_ic == $iline_io
&& $opening_vertical_tightness{$token_oo} )
{
$rmax_vertical_tightness->{$outer_seqno} = 0;
}
my $is_multiline_weld =
$iline_oo == $iline_io
&& $iline_ic == $iline_oc
&& $iline_io != $iline_ic;
if (DEBUG_WELD) {
my $len_oo = $rLL->[$Kouter_opening]->[_CUMULATIVE_LENGTH_];
my $len_io = $rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_];
$Msg .= <<EOM;
Pair seqo=$outer_seqno seqi=$inner_seqno lines: loo=$iline_oo lio=$iline_io lic=$iline_ic loc=$iline_oc
Koo=$Kouter_opening Kio=$Kinner_opening Kic=$Kinner_closing Koc=$Kouter_closing lenoo=$len_oo lenio=$len_io
tokens '$token_oo' .. '$token_io'
EOM
}
# DO-NOT-WELD RULE 0:
# Avoid a new paren-paren weld if inner parens are 'sheared' (separated
# by one line). This can produce instabilities (fixes b1250 b1251
# 1256).
if ( !$is_multiline_weld
&& $iline_ic == $iline_io + 1
&& $token_oo eq '('
&& $token_io eq '(' )
{
if (DEBUG_WELD) {
$Msg .= "RULE 0: Not welding due to sheared inner parens\n";
print {*STDOUT} $Msg;
}
next;
}
# If this pair is not adjacent to the previous pair (skipped or not),
# then measure lengths from the start of line of oo.
if (
!$touch_previous_pair
# Also do this if restarting at a new line; fixes case b965, s001
|| ( !$weld_count_this_start && $iline_oo > $iline_outer_opening )
)
{
# Remember the line we are using as a reference
$iline_outer_opening = $iline_oo;
$weld_count_this_start = 0;
$weld_starts_in_block = 0;
( my $new_weld_ok, $maximum_text_length, $starting_lentot, my $msg )
= $self->setup_new_weld_measurements( $Kouter_opening,
$Kinner_opening );
if (
!$new_weld_ok
&& ( $iline_oo != $iline_io
|| $iline_ic != $iline_oc )
)
{
if (DEBUG_WELD) { print {*STDOUT} $msg }
next;
}
my $rK_range = $rlines->[$iline_oo]->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
# An existing one-line weld is a line in which
# (1) the containers are all on one line, and
# (2) the line does not exceed the allowable length
if ( $iline_oo == $iline_oc ) {
# All the tokens are on one line, now check their length.
# Start with the full line index range. We will reduce this
# in the coding below in some cases.
my $Kstart = $Kfirst;
my $Kstop = $Klast;
# Note that the following minimal choice for measuring will
# work and will not cause any instabilities because it is
# invariant:
## my $Kstart = $Kouter_opening;
## my $Kstop = $Kouter_closing;
# But that can lead to some undesirable welds. So a little
# more complicated method has been developed.
# We are trying to avoid creating bad two-line welds when we are
# working on long, previously un-welded input text, such as
# INPUT (example of a long input line weld candidate):
## $mutation->transpos( $self->RNA->position($mutation->label, $atg_label));
# GOOD two-line break: (not welded; result marked too long):
## $mutation->transpos(
## $self->RNA->position($mutation->label, $atg_label));
# BAD two-line break: (welded; result if we weld):
## $mutation->transpos($self->RNA->position(
## $mutation->label, $atg_label));
# We can only get an approximate estimate of the final length,
# since the line breaks may change, and for -lp mode because
# even the indentation is not yet known.
my $level_first = $rLL->[$Kfirst]->[_LEVEL_];
my $level_last = $rLL->[$Klast]->[_LEVEL_];
my $level_oo = $rLL->[$Kouter_opening]->[_LEVEL_];
my $level_oc = $rLL->[$Kouter_closing]->[_LEVEL_];
# - measure to the end of the original line if balanced
# - measure to the closing container if unbalanced (fixes b1230)
#if ( $level_first != $level_last ) { $Kstop = $Kouter_closing }
if ( $level_oc > $level_last ) { $Kstop = $Kouter_closing }
# - measure from the start of the original line if balanced
# - measure from the most previous token with same level
# if unbalanced (b1232)
if ( $Kouter_opening > $Kfirst && $level_oo > $level_first ) {
$Kstart = $Kouter_opening;
foreach
my $KK ( reverse( $Kfirst + 1 .. $Kouter_opening - 1 ) )
{
next if ( $rLL->[$KK]->[_TYPE_] eq 'b' );
last if ( $rLL->[$KK]->[_LEVEL_] < $level_oo );
$Kstart = $KK;
}
}
my $excess =
$self->excess_line_length_for_Krange( $Kstart, $Kstop );
# Coding simplified here for case b1219.
# Increased tol from 0 to 1 when pvt>0 to fix b1284.
$is_one_line_weld = $excess <= $one_line_tol;
}
# DO-NOT-WELD RULE 1:
# Do not weld something that looks like the start of a two-line
# function call, like this: <<snippets/wn6.in>>
# $trans->add_transformation(
# PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
# We will look for a semicolon after the closing paren.
# We want to weld something complex, like this though
# my $compass = uc( opposite_direction( line_to_canvas_direction(
# @{ $coords[0] }, @{ $coords[1] } ) ) );
# Otherwise we will get a 'blinker'. For example, the following
# would become a blinker without this rule:
# $Self->_Add( $SortOrderDisplay{ $Field
# ->GenerateFieldForSelectSQL() } );
# But it is okay to weld a two-line statement if it looks like
# it was already welded, meaning that the two opening containers are
# on a different line that the two closing containers. This is
# necessary to prevent blinking of something like this with
# perltidy -wn -pbp (starting indentation two levels deep):
# $top_label->set_text( gettext(
# "Unable to create personal directory - check permissions.") );
if ( $iline_oc == $iline_oo + 1
&& $iline_io == $iline_ic
&& $token_oo eq '(' )
{
# Look for following semicolon...
my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
my $next_nonblank_type =
defined($Knext_nonblank)
? $rLL->[$Knext_nonblank]->[_TYPE_]
: 'b';
if ( $next_nonblank_type eq ';' ) {
# Then do not weld if no other containers between inner
# opening and closing.
my $Knext_seq_item = $rK_next_seqno_by_K->[$Kinner_opening];
if ( $Knext_seq_item == $Kinner_closing ) {
$do_not_weld_rule = 1;
}
}
}
} ## end starting new weld sequence
else {
# set the 1-line flag if continuing a weld sequence; fixes b1239
$is_one_line_weld = ( $iline_oo == $iline_oc );
}
# DO-NOT-WELD RULE 2:
# Do not weld an opening paren to an inner one line brace block
# We will just use old line numbers for this test and require
# iterations if necessary for convergence
# For example, otherwise we could cause the opening paren
# in the following example to separate from the caller name
# as here:
# $_[0]->code_handler
# ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
# Here is another example where we do not want to weld:
# $wrapped->add_around_modifier(
# sub { push @tracelog => 'around 1'; $_[0]->(); } );
# If the one line sub block gets broken due to length or by the
# user, then we can weld. The result will then be:
# $wrapped->add_around_modifier( sub {
# push @tracelog => 'around 1';
# $_[0]->();
# } );
# Updated to fix cases b1082 b1102 b1106 b1115:
# Also, do not weld to an intact inner block if the outer opening token
# is on a different line. For example, this prevents oscillation
# between these two states in case b1106:
# return map{
# ($_,[$self->$_(@_[1..$#_])])
# }@every;
# return map { (
# $_, [ $self->$_( @_[ 1 .. $#_ ] ) ]
# ) } @every;
# The effect of this change on typical code is very minimal. Sometimes
# it may take a second iteration to converge, but this gives protection
# against blinking.
if ( !$do_not_weld_rule
&& !$is_one_line_weld
&& $iline_ic == $iline_io )
{
$do_not_weld_rule = 2
if ( $token_oo eq '(' || $iline_oo != $iline_io );
}
# DO-NOT-WELD RULE 2A:
# Do not weld an opening asub brace in -lp mode if -asbl is set. This
# helps avoid instabilities in one-line block formation, and fixes
# b1241. Previously, the '$is_one_line_weld' flag was tested here
# instead of -asbl, and this fixed most cases. But it turns out that
# the real problem was the -asbl flag, and switching to this was
# necessary to fixe b1268. This also fixes b1269, b1277, b1278.
if ( !$do_not_weld_rule
&& $rOpts_line_up_parentheses
&& $rOpts_asbl
&& $ris_asub_block->{$outer_seqno} )
{
$do_not_weld_rule = '2A';
}
# DO-NOT-WELD RULE 3:
# Do not weld if this makes our line too long.
# Use a tolerance which depends on if the old tokens were welded
# (fixes cases b746 b748 b749 b750 b752 b753 b754 b755 b756 b758 b759)
if ( !$do_not_weld_rule ) {
# Measure to a little beyond the inner opening token if it is
# followed by a bare word, which may have unusual line break rules.
# NOTE: Originally this was OLD RULE 6: do not weld to a container
# which is followed on the same line by an unknown bareword token.
# This can cause blinkers (cases b626, b611). But OK to weld one
# line welds to fix cases b1057 b1064. For generality, OLD RULE 6
# has been merged into RULE 3 here to also fix cases b1078 b1091.
my $K_for_length = $Kinner_opening;
my $Knext_io = $self->K_next_nonblank($Kinner_opening);
next unless ( defined($Knext_io) ); # shouldn't happen
my $type_io_next = $rLL->[$Knext_io]->[_TYPE_];
# Note: may need to eventually also include other types here,
# such as 'Z' and 'Y': if ($type_io_next =~ /^[ZYw]$/) {
if ( $type_io_next eq 'w' ) {
my $Knext_io2 = $self->K_next_nonblank($Knext_io);
next unless ( defined($Knext_io2) );
my $type_io_next2 = $rLL->[$Knext_io2]->[_TYPE_];
if ( !$type_ok_after_bareword{$type_io_next2} ) {
$K_for_length = $Knext_io2;
}
}
# Use a tolerance for welds over multiple lines to avoid blinkers.
# We can use zero tolerance if it looks like we are working on an
# existing weld.
my $tol =
$is_one_line_weld || $is_multiline_weld
? $single_line_tol
: $multiline_tol;
$tol += $stress_tol;
# By how many characters does this exceed the text window?
my $excess =
$self->cumulative_length_before_K($K_for_length) -
$starting_lentot + 1 + $tol -
$maximum_text_length;
# Old patch: Use '>=0' instead of '> 0' here to fix cases b995 b998
# b1000 b1001 b1007 b1008 b1009 b1010 b1011 b1012 b1016 b1017 b1018
# Revised patch: New tolerance definition allows going back to '> 0'
# here. This fixes case b1124. See also cases b1087 and b1087a.
if ( $excess > 0 ) { $do_not_weld_rule = 3 }
if (DEBUG_WELD) {
$Msg .=
"RULE 3 test: excess length to K=$Kinner_opening is $excess > 0 with tol= $tol ?) \n";
}
}
# DO-NOT-WELD RULE 4; implemented for git#10:
# Do not weld an opening -ce brace if the next container is on a single
# line, different from the opening brace. (This is very rare). For
# example, given the following with -ce, we will avoid joining the {
# and [
# } else {
# [ $_, length($_) ]
# }
# because this would produce a terminal one-line block:
# } else { [ $_, length($_) ] }
# which may not be what is desired. But given this input:
# } else { [ $_, length($_) ] }
# then we will do the weld and retain the one-line block
if ( !$do_not_weld_rule && $rOpts->{'cuddled-else'} ) {
my $block_type = $rblock_type_of_seqno->{$outer_seqno};
if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
my $io_line = $inner_opening->[_LINE_INDEX_];
my $ic_line = $inner_closing->[_LINE_INDEX_];
my $oo_line = $outer_opening->[_LINE_INDEX_];
if ( $oo_line < $io_line && $ic_line == $io_line ) {
$do_not_weld_rule = 4;
}
}
}
# DO-NOT-WELD RULE 5: do not include welds excluded by user
if (
!$do_not_weld_rule
&& %weld_nested_exclusion_rules
&& ( $self->is_excluded_weld( $Kouter_opening, $starting_new_weld )
|| $self->is_excluded_weld( $Kinner_opening, 0 ) )
)
{
$do_not_weld_rule = 5;
}
# DO-NOT-WELD RULE 6: This has been merged into RULE 3 above.
if ($do_not_weld_rule) {
# After neglecting a pair, we start measuring from start of point
# io ... but not if previous type does not like to be separated
# from its container (fixes case b1184)
my $Kprev = $self->K_previous_nonblank($Kinner_opening);
my $type_prev = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'w';
if ( !$has_tight_paren{$type_prev} ) {
my $starting_level = $inner_opening->[_LEVEL_];
my $starting_ci_level = $inner_opening->[_CI_LEVEL_];
$starting_lentot =
$self->cumulative_length_before_K($Kinner_opening);
$maximum_text_length =
$maximum_text_length_at_level[$starting_level] -
$starting_ci_level * $rOpts_continuation_indentation;
}
if (DEBUG_WELD) {
$Msg .= "Not welding due to RULE $do_not_weld_rule\n";
print {*STDOUT} $Msg;
}
# Normally, a broken pair should not decrease indentation of
# intermediate tokens:
## if ( $last_pair_broken ) { next }
# However, for long strings of welded tokens, such as '{{{{{{...'
# we will allow broken pairs to also remove indentation.
# This will keep very long strings of opening and closing
# braces from marching off to the right. We will do this if the
# number of tokens in a weld before the broken weld is 4 or more.
# This rule will mainly be needed for test scripts, since typical
# welds have fewer than about 4 welded tokens.
if ( !@welds || @{ $welds[-1] } < 4 ) { next }
}
# otherwise start new weld ...
elsif ($starting_new_weld) {
$weld_count_this_start++;
if (DEBUG_WELD) {
$Msg .= "Starting new weld\n";
print {*STDOUT} $Msg;
}
push @welds, $item;
my $parent_seqno = $self->parent_seqno_by_K($Kouter_closing);
$weld_starts_in_block = $parent_seqno == SEQ_ROOT
|| $rblock_type_of_seqno->{$parent_seqno};
$rK_weld_right->{$Kouter_opening} = $Kinner_opening;
$rK_weld_left->{$Kinner_opening} = $Kouter_opening;
$rK_weld_right->{$Kinner_closing} = $Kouter_closing;
$rK_weld_left->{$Kouter_closing} = $Kinner_closing;
}
# ... or extend current weld
else {
$weld_count_this_start++;
if (DEBUG_WELD) {
$Msg .= "Extending current weld\n";
print {*STDOUT} $Msg;
}
unshift @{ $welds[-1] }, $inner_seqno;
$rK_weld_right->{$Kouter_opening} = $Kinner_opening;
$rK_weld_left->{$Kinner_opening} = $Kouter_opening;
$rK_weld_right->{$Kinner_closing} = $Kouter_closing;
$rK_weld_left->{$Kouter_closing} = $Kinner_closing;
# Keep a broken container broken at multiple welds. This might
# also be useful for simple welds, but for now it is restricted
# to multiple welds to minimize changes to existing coding. This
# fixes b1429, b1430. Updated for issue c198: but allow a
# line differences of 1 (simple shear) so that a simple shear
# can remain or become a single line.
if ( $iline_ic - $iline_io > 1 ) {
# Only set this break if it is the last possible weld in this
# chain. This will keep some extreme test cases unchanged.
my $is_chain_end = !@{$rnested_pairs}
|| $rnested_pairs->[-1]->[1] != $inner_seqno;
if ($is_chain_end) {
$self->[_rbreak_container_]->{$inner_seqno} = 1;
}
}
}
# After welding, reduce the indentation level if all intermediate tokens
my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
if ( $dlevel != 0 ) {
my $Kstart = $Kinner_opening;
my $Kstop = $Kinner_closing;
foreach my $KK ( $Kstart .. $Kstop ) {
$rLL->[$KK]->[_LEVEL_] += $dlevel;
}
# Copy opening ci level to help break at = for -lp mode (case b1124)
$rLL->[$Kinner_opening]->[_CI_LEVEL_] =
$rLL->[$Kouter_opening]->[_CI_LEVEL_];
# But only copy the closing ci level if the outer container is
# in a block; otherwise poor results can be produced.
if ($weld_starts_in_block) {
$rLL->[$Kinner_closing]->[_CI_LEVEL_] =
$rLL->[$Kouter_closing]->[_CI_LEVEL_];
}
}
} ## end while ( @{$rnested_pairs})
return;
} ## end sub weld_nested_containers
sub weld_nested_quotes {
my $self = shift;
# Called once per file for option '--weld-nested-containers'. This
# does welding on qw quotes.
# See if quotes are excluded from welding
my $rflags = $weld_nested_exclusion_rules{'q'};
return if ( defined($rflags) && defined( $rflags->[1] ) );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $Num = @{$rLL};
my $rK_weld_left = $self->[_rK_weld_left_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rlines = $self->[_rlines_];
my $starting_lentot;
my $maximum_text_length;
my $is_single_quote = sub {
my ( $Kbeg, $Kend, $quote_type ) = @_;
foreach my $K ( $Kbeg .. $Kend ) {
my $test_type = $rLL->[$K]->[_TYPE_];
next if ( $test_type eq 'b' );
return if ( $test_type ne $quote_type );
}
return 1;
}; ## end $is_single_quote = sub
# Length tolerance - same as previously used for sub weld_nested
my $multiline_tol =
1 + max( $rOpts_indent_columns, $rOpts_continuation_indentation );
# look for single qw quotes nested in containers
foreach my $outer_seqno ( keys %{$K_opening_container} ) {
my $Kouter_opening = $K_opening_container->{$outer_seqno};
# see if the next token is a quote of some type
my $Kn = $Kouter_opening + 1;
next if ( $Kn >= $Num - 1 );
my $next_type = $rLL->[$Kn]->[_TYPE_];
if ( $next_type eq 'b' ) {
$next_type = $rLL->[ ++$Kn ]->[_TYPE_];
}
next if ( $next_type ne 'q' && $next_type ne 'Q' );
my $next_token = $rLL->[$Kn]->[_TOKEN_];
next if ( substr( $next_token, 0, 1 ) ne 'q' );
# The token before the closing container must also be a quote
my $Kouter_closing = $K_closing_container->{$outer_seqno};
my $Kinner_closing = $self->K_previous_nonblank($Kouter_closing);
next unless ( $rLL->[$Kinner_closing]->[_TYPE_] eq $next_type );
# This is an inner opening container
my $Kinner_opening = $Kn;
# Do not weld to single-line quotes. Nothing is gained, and it may
# look bad.
next if ( $Kinner_closing == $Kinner_opening );
# RULE: Avoid welding under stress. This is an alternate b1502 fix.
my $inner_level = $rLL->[$Kinner_opening]->[_LEVEL_];
if ( $inner_level >= $high_stress_level ) { next }
# Only weld to quotes delimited with container tokens. This is
# because welding to arbitrary quote delimiters can produce code
# which is less readable than without welding.
my $closing_delimiter =
substr( $rLL->[$Kinner_closing]->[_TOKEN_], -1, 1 );
next
unless ( $is_closing_token{$closing_delimiter}
|| $closing_delimiter eq '>' );
# Now make sure that there is just a single quote in the container
next
unless (
$is_single_quote->(
$Kinner_opening + 1,
$Kinner_closing - 1,
$next_type
)
);
# OK: This is a candidate for welding
my $Msg = EMPTY_STRING;
my $do_not_weld;
my $iline_oo = $rLL->[$Kouter_opening]->[_LINE_INDEX_];
my $iline_io = $rLL->[$Kinner_opening]->[_LINE_INDEX_];
my $iline_oc = $rLL->[$Kouter_closing]->[_LINE_INDEX_];
my $iline_ic = $rLL->[$Kinner_closing]->[_LINE_INDEX_];
my $is_old_weld = ( $iline_oo == $iline_io && $iline_ic == $iline_oc );
# Fix for case b1189. If quote is marked as type 'Q' then only weld
# if the two closing tokens are on the same input line. Otherwise,
# the closing line will be output earlier in the pipeline than
# other CODE lines and welding will not actually occur. This will
# leave a half-welded structure with potential formatting
# instability. This might be fixed by adding a check for a weld on
# a closing Q token and sending it down the normal channel, but it
# would complicate the code and is potentially risky.
next
if (!$is_old_weld
&& $next_type eq 'Q'
&& $iline_ic != $iline_oc );
# If welded, the line must not exceed allowed line length
( my $ok_to_weld, $maximum_text_length, $starting_lentot, my $msg ) =
$self->setup_new_weld_measurements( $Kouter_opening,
$Kinner_opening );
if ( !$ok_to_weld ) {
if (DEBUG_WELD) { print {*STDOUT} $msg }
next;
}
my $length =
$rLL->[$Kinner_opening]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
my $excess = $length + $multiline_tol - $maximum_text_length;
my $excess_max = ( $is_old_weld ? $multiline_tol : 0 );
if ( $excess >= $excess_max ) {
$do_not_weld = 1;
}
if (DEBUG_WELD) {
if ( !$is_old_weld ) { $is_old_weld = EMPTY_STRING }
$Msg .=
"excess=$excess>=$excess_max, multiline_tol=$multiline_tol, is_old_weld='$is_old_weld'\n";
}
# Check weld exclusion rules for outer container
if ( !$do_not_weld ) {
my $is_leading = !defined( $rK_weld_left->{$Kouter_opening} );
if ( $self->is_excluded_weld( $Kouter_opening, $is_leading ) ) {
if (DEBUG_WELD) {
$Msg .=
"No qw weld due to weld exclusion rules for outer container\n";
}
$do_not_weld = 1;
}
}
# Check the length of the last line (fixes case b1039)
if ( !$do_not_weld ) {
my $rK_range_ic = $rlines->[$iline_ic]->{_rK_range};
my ( $Kfirst_ic, $Klast_ic_uu ) = @{$rK_range_ic};
my $excess_ic =
$self->excess_line_length_for_Krange( $Kfirst_ic,
$Kouter_closing );
# Allow extra space for additional welded closing container(s)
# and a space and comma or semicolon.
# NOTE: weld len has not been computed yet. Use 2 spaces
# for now, correct for a single weld. This estimate could
# be made more accurate if necessary.
my $weld_len = defined( $rK_weld_right->{$Kouter_closing} ) ? 2 : 0;
if ( $excess_ic + $weld_len + 2 > 0 ) {
if (DEBUG_WELD) {
$Msg .=
"No qw weld due to excess ending line length=$excess_ic + $weld_len + 2 > 0\n";
}
$do_not_weld = 1;
}
}
if ($do_not_weld) {
if (DEBUG_WELD) {
$Msg .= "Not Welding QW\n";
print {*STDOUT} $Msg;
}
next;
}
# OK to weld
if (DEBUG_WELD) {
$Msg .= "Welding QW\n";
print {*STDOUT} $Msg;
}
$rK_weld_right->{$Kouter_opening} = $Kinner_opening;
$rK_weld_left->{$Kinner_opening} = $Kouter_opening;
$rK_weld_right->{$Kinner_closing} = $Kouter_closing;
$rK_weld_left->{$Kouter_closing} = $Kinner_closing;
# Undo one indentation level if an extra level was added to this
# multiline quote
my $qw_seqno =
$self->[_rstarting_multiline_qw_seqno_by_K_]->{$Kinner_opening};
if ( $qw_seqno
&& $self->[_rmultiline_qw_has_extra_level_]->{$qw_seqno} )
{
foreach my $K ( $Kinner_opening + 1 .. $Kinner_closing - 1 ) {
$rLL->[$K]->[_LEVEL_] -= 1;
}
$rLL->[$Kinner_opening]->[_CI_LEVEL_] = 0;
$rLL->[$Kinner_closing]->[_CI_LEVEL_] = 0;
}
# undo CI for other welded quotes
else {
foreach my $K ( $Kinner_opening .. $Kinner_closing ) {
$rLL->[$K]->[_CI_LEVEL_] = 0;
}
}
# Change the level of a closing qw token to be that of the outer
# containing token. This will allow -lp indentation to function
# correctly in the vertical aligner.
# Patch to fix c002: but not if it contains text
if ( length( $rLL->[$Kinner_closing]->[_TOKEN_] ) == 1 ) {
$rLL->[$Kinner_closing]->[_LEVEL_] =
$rLL->[$Kouter_closing]->[_LEVEL_];
}
}
return;
} ## end sub weld_nested_quotes
sub is_welded_at_seqno {
my ( $self, $seqno ) = @_;
# Given:
# $seqno = a sequence number:
# Return:
# true if it is welded either left or right
# false otherwise
return unless ( $total_weld_count && defined($seqno) );
my $KK_o = $self->[_K_opening_container_]->{$seqno};
return unless defined($KK_o);
return defined( $self->[_rK_weld_left_]->{$KK_o} )
|| defined( $self->[_rK_weld_right_]->{$KK_o} );
} ## end sub is_welded_at_seqno
sub mark_short_nested_blocks {
my $self = shift;
# This routine looks at the entire file and marks any short nested blocks
# which should not be broken. The results are stored in the hash
# $rshort_nested->{$type_sequence}
# which will be true if the container should remain intact.
#
# For example, consider the following line:
# sub cxt_two { sort { $a <=> $b } test_if_list() }
# The 'sort' block is short and nested within an outer sub block.
# Normally, the existence of the 'sort' block will force the sub block to
# break open, but this is not always desirable. Here we will set a flag for
# the sort block to prevent this. To give the user control, we will
# follow the input file formatting. If either of the blocks is broken in
# the input file then we will allow it to remain broken. Otherwise we will
# set a flag to keep it together in later formatting steps.
# The flag which is set here will be checked in two places:
# 'sub process_line_of_CODE' and 'sub starting_one_line_block'
return if $rOpts->{'indent-only'};
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
return unless ( $rOpts->{'one-line-block-nesting'} );
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rbreak_container = $self->[_rbreak_container_];
my $ris_broken_container = $self->[_ris_broken_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rK_sequenced_token_list = $self->[_rK_sequenced_token_list_];
# Variables needed for estimating line lengths
my $maximum_text_length;
my $starting_lentot;
my $length_tol = 1;
my $excess_length_to_K = sub {
my ($K) = @_;
# Estimate the length from the line start to a given token
my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
my $excess_length = $length + $length_tol - $maximum_text_length;
return ($excess_length);
}; ## end $excess_length_to_K = sub
# loop over all containers
my @open_block_stack;
my $iline = -1;
foreach my $KK ( @{$rK_sequenced_token_list} ) {
my $rtoken_vars = $rLL->[$KK];
my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
if ( !$type_sequence ) {
next if ( $KK == 0 ); # first token in file may not be container
# A fault here implies that an error was made in the little loop at
# the bottom of sub 'respace_tokens' which set the values of
# $rK_sequenced_token_list. Or an error has been introduced in the
# loop control lines above.
Fault("sequence = $type_sequence not defined at K=$KK")
if (DEVEL_MODE);
next;
}
# Patch: do not mark short blocks with welds.
# In some cases blinkers can form (case b690).
if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence) ) {
next;
}
# We are just looking at code blocks
my $token = $rtoken_vars->[_TOKEN_];
my $type = $rtoken_vars->[_TYPE_];
next unless ( $type eq $token );
next unless ( $rblock_type_of_seqno->{$type_sequence} );
# Keep a stack of all acceptable block braces seen.
# Only consider blocks entirely on one line so dump the stack when line
# changes.
my $iline_last = $iline;
$iline = $rLL->[$KK]->[_LINE_INDEX_];
if ( $iline != $iline_last ) { @open_block_stack = () }
if ( $token eq '}' ) {
if (@open_block_stack) { pop @open_block_stack }
}
next unless ( $token eq '{' );
# block must be balanced (bad scripts may be unbalanced)
my $K_opening = $K_opening_container->{$type_sequence};
my $K_closing = $K_closing_container->{$type_sequence};
next unless ( defined($K_opening) && defined($K_closing) );
# require that this block be entirely on one line
next
if ( $ris_broken_container->{$type_sequence}
|| $rbreak_container->{$type_sequence} );
# See if this block fits on one line of allowed length (which may
# be different from the input script)
$starting_lentot =
$KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
my $level = $rLL->[$KK]->[_LEVEL_];
my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
$maximum_text_length =
$maximum_text_length_at_level[$level] -
$ci_level * $rOpts_continuation_indentation;
# Dump the stack if block is too long and skip this block
if ( $excess_length_to_K->($K_closing) > 0 ) {
@open_block_stack = ();
next;
}
# OK, Block passes tests, remember it
push @open_block_stack, $type_sequence;
# We are only marking nested code blocks,
# so check for a previous block on the stack
next if ( @open_block_stack <= 1 );
# Looks OK, mark this as a short nested block
$rshort_nested->{$type_sequence} = 1;
}
return;
} ## end sub mark_short_nested_blocks
sub special_indentation_adjustments {
my ($self) = @_;
# Called once per file to define the levels to be used for computing
# actual indentation. These levels are initialized to be the structural
# levels and then are adjusted if necessary for special purposes.
# The adjustments are made either by changing _CI_LEVEL_ directly or
# by setting modified levels in the array $self->[_radjusted_levels_].
# NOTE: This routine is called after the weld routines, which may have
# already adjusted the initial values of _LEVEL_, so we are making
# adjustments on top of those levels. It would be nicer to have the
# weld routines also use this adjustment, but that gets complicated
# when we combine -gnu -wn and also have some welded quotes.
my $rLL = $self->[_rLL_];
return unless ( @{$rLL} );
# Initialize the adjusted levels to be the structural levels
my @adjusted_levels = map { $_->[_LEVEL_] } @{$rLL};
$self->[_radjusted_levels_] = \@adjusted_levels;
my $min_starting_level = min(@adjusted_levels);
# First set adjusted levels for any non-indenting braces.
$self->do_non_indenting_braces();
# Adjust breaks and indentation list containers
$self->break_before_list_opening_containers();
# Set adjusted levels for the whitespace cycle option.
$self->whitespace_cycle_adjustment();
$self->braces_left_setup();
# Adjust continuation indentation if -bli is set
$self->bli_adjustment();
$self->extended_ci()
if ($rOpts_extended_continuation_indentation);
# Now clip any starting or adjusted levels to be non-negative
$self->clip_adjusted_levels($min_starting_level);
return;
} ## end sub special_indentation_adjustments
sub clip_adjusted_levels {
my ( $self, $min_starting_level ) = @_;
# Replace any negative adjusted levels with zero.
# Negative levels can only occur in files with brace errors.
# Given:
# $min_starting_level = minimum (adjusted) level of the input stream
# Clip the original _LEVEL_ values to zero if necessary
my $rLL = $self->[_rLL_];
if ( $min_starting_level < 0 ) {
foreach my $item ( @{$rLL} ) {
if ( $item->[_LEVEL_] < 0 ) { $item->[_LEVEL_] = 0 }
}
}
# Clip the adjusted levels to zero if necessary
my $radjusted_levels = $self->[_radjusted_levels_];
return unless ( defined($radjusted_levels) && @{$radjusted_levels} );
my $min = min( @{$radjusted_levels} ); # fast check for min
if ( $min < 0 ) {
# slow loop, but rarely needed
foreach ( @{$radjusted_levels} ) { $_ = 0 if ( $_ < 0 ) }
}
return;
} ## end sub clip_adjusted_levels
sub do_non_indenting_braces {
my ($self) = @_;
# Called once per file to handle the --non-indenting-braces parameter.
# Remove indentation within marked braces if requested
# Any non-indenting braces have been found by sub find_non_indenting_braces
# and are defined by the following hash:
my $rseqno_non_indenting_brace_by_ix =
$self->[_rseqno_non_indenting_brace_by_ix_];
return unless ( %{$rseqno_non_indenting_brace_by_ix} );
my $rlines = $self->[_rlines_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rspecial_side_comment_type = $self->[_rspecial_side_comment_type_];
my $radjusted_levels = $self->[_radjusted_levels_];
# First locate all of the marked blocks
my @K_stack;
foreach my $ix ( keys %{$rseqno_non_indenting_brace_by_ix} ) {
my $seqno = $rseqno_non_indenting_brace_by_ix->{$ix};
my $KK = $K_opening_container->{$seqno};
my $line_of_tokens = $rlines->[$ix];
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst_uu, $Klast ) = @{$rK_range};
$rspecial_side_comment_type->{$Klast} = 'NIB';
push @K_stack, [ $KK, 1 ];
my $Kc = $K_closing_container->{$seqno};
push @K_stack, [ $Kc, -1 ] if ( defined($Kc) );
}
return unless (@K_stack);
@K_stack = sort { $a->[0] <=> $b->[0] } @K_stack;
# Then loop to remove indentation within marked blocks
my $KK_last = 0;
my $ndeep = 0;
foreach my $item (@K_stack) {
my ( $KK, $inc ) = @{$item};
if ( $ndeep > 0 ) {
foreach ( $KK_last + 1 .. $KK ) {
$radjusted_levels->[$_] -= $ndeep;
}
# We just subtracted the old $ndeep value, which only applies to a
# '{'. The new $ndeep applies to a '}', so we undo the error.
if ( $inc < 0 ) { $radjusted_levels->[$KK] += 1 }
}
$ndeep += $inc;
$KK_last = $KK;
}
return;
} ## end sub do_non_indenting_braces
sub whitespace_cycle_adjustment {
my $self = shift;
# Called once per file to implement the --whitespace-cycle option
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $radjusted_levels = $self->[_radjusted_levels_];
my $maximum_level = $self->[_maximum_level_];
if ( $rOpts_whitespace_cycle
&& $rOpts_whitespace_cycle > 0
&& $rOpts_whitespace_cycle < $maximum_level )
{
my $Kmax = @{$rLL} - 1;
my $whitespace_last_level = -1;
my @whitespace_level_stack = ();
my $last_nonblank_type = 'b';
my $last_nonblank_token = EMPTY_STRING;
foreach my $KK ( 0 .. $Kmax ) {
my $level_abs = $radjusted_levels->[$KK];
my $level = $level_abs;
if ( $level_abs < $whitespace_last_level ) {
pop(@whitespace_level_stack);
}
if ( !@whitespace_level_stack ) {
push @whitespace_level_stack, $level_abs;
}
else {
if ( $level_abs > $whitespace_last_level ) {
$level = $whitespace_level_stack[-1] +
( $level_abs - $whitespace_last_level );
if (
# 1 Try to break at a block brace
(
$level > $rOpts_whitespace_cycle
&& $last_nonblank_type eq '{'
&& $last_nonblank_token eq '{'
)
# 2 Then either a brace or bracket
|| ( $level > $rOpts_whitespace_cycle + 1
&& $last_nonblank_token =~ /^[\{\[]$/ )
# 3 Then a paren too
|| $level > $rOpts_whitespace_cycle + 2
)
{
$level = 1;
}
push @whitespace_level_stack, $level;
}
}
$level = $whitespace_level_stack[-1];
$radjusted_levels->[$KK] = $level;
$whitespace_last_level = $level_abs;
my $type = $rLL->[$KK]->[_TYPE_];
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $type ne 'b' ) {
$last_nonblank_type = $type;
$last_nonblank_token = $token;
}
}
}
return;
} ## end sub whitespace_cycle_adjustment
use constant DEBUG_BBX => 0;
sub break_before_list_opening_containers {
my ($self) = @_;
# This routine is called once per batch to implement parameters:
# --break-before-hash-brace=n and similar -bbx=n flags
# and their associated indentation flags:
# --break-before-hash-brace-and-indent and similar -bbxi=n
# Nothing to do if none of the -bbx=n parameters has been set
return unless %break_before_container_types;
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
# Loop over all opening container tokens
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $ris_broken_container = $self->[_ris_broken_container_];
my $ris_permanently_broken = $self->[_ris_permanently_broken_];
my $rhas_list = $self->[_rhas_list_];
my $rhas_broken_list_with_lec = $self->[_rhas_broken_list_with_lec_];
my $radjusted_levels = $self->[_radjusted_levels_];
my $rparent_of_seqno = $self->[_rparent_of_seqno_];
my $rlines = $self->[_rlines_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
my $rlec_count_by_seqno = $self->[_rlec_count_by_seqno_];
my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
my $rK_weld_right = $self->[_rK_weld_right_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $length_tol =
max( 1, $rOpts_continuation_indentation, $rOpts_indent_columns );
if ($rOpts_ignore_old_breakpoints) {
# Patch suggested by b1231; the old tol was excessive.
## $length_tol += $rOpts_maximum_line_length;
$length_tol *= 2;
}
#-------------------------------------------------------
# These arrays are used to mark the affected containers:
#-------------------------------------------------------
my $rbreak_before_container_by_seqno = {};
my $rwant_reduced_ci = {};
#------------------------------
# Main loop over all containers
#------------------------------
foreach my $seqno ( keys %{$K_opening_container} ) {
#----------------------------------------------------------------
# Part 1: Examine any -bbx=n flags
#----------------------------------------------------------------
next if ( $rblock_type_of_seqno->{$seqno} );
my $KK = $K_opening_container->{$seqno};
# This must be a list or contain a list.
# Note1: switched from 'has_broken_list' to 'has_list' to fix b1024.
# Note2: 'has_list' holds the depth to the sub-list. We will require
# a depth of just 1
my $is_list = $self->is_list_by_seqno($seqno);
my $has_list = $rhas_list->{$seqno};
# Fix for b1173: if welded opening container, use flag of innermost
# seqno. Otherwise, the restriction $has_list==1 prevents triple and
# higher welds from following the -BBX parameters.
if ($total_weld_count) {
my $KK_test = $rK_weld_right->{$KK};
if ( defined($KK_test) ) {
my $seqno_inner = $rLL->[$KK_test]->[_TYPE_SEQUENCE_];
$is_list ||= $self->is_list_by_seqno($seqno_inner);
$has_list = $rhas_list->{$seqno_inner};
}
}
next unless ( $is_list || $has_list && $has_list == 1 );
my $has_list_with_lec = $rhas_broken_list_with_lec->{$seqno};
# Only for types of container tokens with a non-default break option
my $token = $rLL->[$KK]->[_TOKEN_];
my $break_option = $break_before_container_types{$token};
next unless ($break_option);
# Do not use -bbx under stress for stability ... fixes b1300
# NOTE: Testing in v20240501 showed that this check is no longer
# needed for stability, but there is little point in removing it.
my $level = $rLL->[$KK]->[_LEVEL_];
if ( $level >= $stress_level_beta ) {
DEBUG_BBX
&& print
"BBX: Switching off at $seqno: level=$level exceeds beta stress level=$stress_level_beta\n";
next;
}
# Require previous nonblank to be '=' or '=>'
my $Kprev = $KK - 1;
next if ( $Kprev < 0 );
my $prev_type = $rLL->[$Kprev]->[_TYPE_];
if ( $prev_type eq 'b' ) {
$Kprev--;
next if ( $Kprev < 0 );
$prev_type = $rLL->[$Kprev]->[_TYPE_];
}
next unless ( $is_equal_or_fat_comma{$prev_type} );
my $ci = $rLL->[$KK]->[_CI_LEVEL_];
#--------------------------------------------
# New coding for option 2 (break if complex).
#--------------------------------------------
# This new coding uses clues which are invariant under formatting to
# decide if a list is complex. For now it is only applied when -lp
# and -vmll are used, but eventually it may become the standard method.
# Fixes b1274, b1275, and others, including b1099.
# Update: case b1469 also had this type of problem; it had the
# combination ci>i and used -xci. This is just a band-aid; eventually
# it might be best if all cases use this logic, but that would change
# existing formatting.
if ( $break_option == 2 ) {
my $b1469 = $rOpts_continuation_indentation > $rOpts_indent_columns
&& $rOpts_extended_continuation_indentation;
if ( $rOpts_line_up_parentheses
|| $rOpts_variable_maximum_line_length
|| $b1469 )
{
# Start with the basic definition of a complex list...
my $is_complex = $is_list && $has_list;
# and it is also complex if the parent is a list
if ( !$is_complex ) {
my $parent = $rparent_of_seqno->{$seqno};
if ( $self->is_list_by_seqno($parent) ) {
$is_complex = 1;
}
}
# finally, we will call it complex if there are inner opening
# and closing container tokens, not parens, within the outer
# container tokens.
if ( !$is_complex ) {
my $Kp = $self->K_next_nonblank($KK);
my $token_p = defined($Kp) ? $rLL->[$Kp]->[_TOKEN_] : 'b';
if ( $is_opening_token{$token_p} && $token_p ne '(' ) {
my $Kc = $K_closing_container->{$seqno};
my $Km = $self->K_previous_nonblank($Kc);
my $token_m = 'b';
my $type_m = SPACE;
if ( defined($Km) ) {
$token_m = $rLL->[$Km]->[_TOKEN_];
$type_m = $rLL->[$Km]->[_TYPE_];
}
# ignore any optional ending comma
if ( $type_m eq ',' ) {
$Km = $self->K_previous_nonblank($Km);
$token_m =
defined($Km) ? $rLL->[$Km]->[_TOKEN_] : 'b';
}
$is_complex ||=
$is_closing_token{$token_m} && $token_m ne ')';
}
}
# Convert to option 3 (always break) if complex
next unless ($is_complex);
$break_option = 3;
}
}
# Fix for b1231: the has_list_with_lec does not cover all cases.
# A broken container containing a list and with line-ending commas
# will stay broken, so can be treated as if it had a list with lec.
$has_list_with_lec ||=
$has_list
&& $ris_broken_container->{$seqno}
&& $rlec_count_by_seqno->{$seqno};
DEBUG_BBX
&& print {*STDOUT}
"BBX: Looking at seqno=$seqno, token = $token with option=$break_option\n";
# -bbx=1 = stable, try to follow input
if ( $break_option == 1 ) {
my $iline = $rLL->[$KK]->[_LINE_INDEX_];
my $rK_range = $rlines->[$iline]->{_rK_range};
my ( $Kfirst, $Klast_uu ) = @{$rK_range};
next unless ( $KK == $Kfirst );
}
# -bbx=2 => apply this style only for a 'complex' list
elsif ( $break_option == 2 ) {
# break if this list contains a broken list with line-ending comma
my $ok_to_break;
my $Msg = EMPTY_STRING;
if ($has_list_with_lec) {
$ok_to_break = 1;
DEBUG_BBX && do { $Msg = "has list with lec;" };
}
if ( !$ok_to_break ) {
# Turn off -xci if -bbx=2 and this container has a sublist but
# not a broken sublist. This avoids creating blinkers. The
# problem is that -xci can cause one-line lists to break open,
# and thereby creating formatting instability.
# This fixes cases b1033 b1036 b1037 b1038 b1042 b1043 b1044
# b1045 b1046 b1047 b1051 b1052 b1061.
if ($has_list) { $rno_xci_by_seqno->{$seqno} = 1 }
my $parent = $rparent_of_seqno->{$seqno};
if ( $self->is_list_by_seqno($parent) ) {
DEBUG_BBX && do { $Msg = "parent is list" };
$ok_to_break = 1;
}
}
if ( !$ok_to_break ) {
DEBUG_BBX
&& print {*STDOUT} "Not breaking at seqno=$seqno: $Msg\n";
next;
}
DEBUG_BBX
&& print {*STDOUT} "OK to break at seqno=$seqno: $Msg\n";
# Patch: turn off -xci if -bbx=2 and -lp
# This fixes cases b1090 b1095 b1101 b1116 b1118 b1121 b1122
$rno_xci_by_seqno->{$seqno} = 1 if ($rOpts_line_up_parentheses);
}
# -bbx=3 = always break
elsif ( $break_option == 3 ) {
# ok to break
}
# Bad flag, this shouldn't happen because of the integer range checks.
# Continue using behavior same as option 3 if not in DEVEL_MODE
else {
DEVEL_MODE && Fault(<<EOM);
Bad -bbx break option=$break_option for '$token': fix integer range checks.
EOM
}
# Set a flag for actual implementation later in
# sub insert_breaks_before_list_opening_containers
$rbreak_before_container_by_seqno->{$seqno} = 1;
DEBUG_BBX
&& print {*STDOUT} "BBX: ok to break at seqno=$seqno\n";
# -bbxi=0: Nothing more to do if the ci value remains unchanged
my $ci_flag = $container_indentation_options{$token};
next unless ($ci_flag);
# -bbxi=1: This option removes ci and is handled in
# later sub get_final_indentation
if ( $ci_flag == 1 ) {
$rwant_reduced_ci->{$seqno} = 1;
next;
}
# -bbxi=2: This option changes the level ...
# This option can conflict with -xci in some cases. We can turn off
# -xci for this container to avoid blinking. For now, only do this if
# -vmll is set. ( fixes b1335, b1336 )
if ($rOpts_variable_maximum_line_length) {
$rno_xci_by_seqno->{$seqno} = 1;
}
#----------------------------------------------------------------
# Part 2: Perform tests before committing to changing ci and level
#----------------------------------------------------------------
# Before changing the ci level of the opening container, we need
# to be sure that the container will be broken in the later stages of
# formatting. We have to do this because we are working early in the
# formatting pipeline. A problem can occur if we change the ci or
# level of the opening token but do not actually break the container
# open as expected. In most cases it wouldn't make any difference if
# we changed ci or not, but there are some edge cases where this
# can cause blinking states, so we need to try to only change ci if
# the container will really be broken.
# Only consider containers already broken
next if ( !$ris_broken_container->{$seqno} );
# Patch to fix issue b1305: the combination of -naws and ci>i appears
# to cause an instability. It should almost never occur in practice.
next
if (!$rOpts_add_whitespace
&& $rOpts_continuation_indentation > $rOpts_indent_columns );
# Always ok to change ci for permanently broken containers
if ( $ris_permanently_broken->{$seqno} ) { }
# Always OK if this list contains a broken sub-container with
# a non-terminal line-ending comma
elsif ($has_list_with_lec) { }
# Otherwise, we are considering a single container...
else {
# A single container must have at least 1 line-ending comma:
next unless ( $rlec_count_by_seqno->{$seqno} );
my $OK;
# Since it has a line-ending comma, it will stay broken if the
# -boc flag is set
if ($rOpts_break_at_old_comma_breakpoints) { $OK = 1 }
# OK if the container contains multiple fat commas
# Better: multiple lines with fat commas
if ( !$OK && !$rOpts_ignore_old_breakpoints ) {
my $rtype_count = $rtype_count_by_seqno->{$seqno};
next unless ($rtype_count);
my $fat_comma_count = $rtype_count->{'=>'};
DEBUG_BBX
&& print {*STDOUT} "BBX: fat comma count=$fat_comma_count\n";
if ( $fat_comma_count && $fat_comma_count >= 2 ) { $OK = 1 }
}
# The last check we can make is to see if this container could
# fit on a single line. Use the least possible indentation
# estimate, ci=0, so we are not subtracting $ci *
# $rOpts_continuation_indentation from tabulated
# $maximum_text_length value.
if ( !$OK ) {
my $maximum_text_length = $maximum_text_length_at_level[$level];
my $K_closing = $K_closing_container->{$seqno};
my $length = $self->cumulative_length_before_K($K_closing) -
$self->cumulative_length_before_K($KK);
my $excess_length = $length - $maximum_text_length;
DEBUG_BBX
&& print {*STDOUT}
"BBX: excess=$excess_length: maximum_text_length=$maximum_text_length, length=$length, ci=$ci\n";
# OK if the net container definitely breaks on length
if ( $excess_length > $length_tol ) {
$OK = 1;
DEBUG_BBX
&& print {*STDOUT} "BBX: excess_length=$excess_length\n";
}
# Otherwise skip it
else { next }
}
}
#------------------------------------------------------------
# Part 3: Looks OK: apply -bbx=n and any related -bbxi=n flag
#------------------------------------------------------------
DEBUG_BBX && print {*STDOUT} "BBX: OK to break\n";
# -bbhbi=n
# -bbsbi=n
# -bbpi=n
# where:
# n=0 default indentation (usually one ci)
# n=1 outdent one ci
# n=2 indent one level (minus one ci)
# NOTE: We are adjusting indentation of the opening container. The
# closing container will normally follow the indentation of the opening
# container automatically, so this is not currently done.
next unless ($ci);
# option 1: outdent
if ( $ci_flag == 1 ) {
$ci -= 1;
}
# option 2: indent one level
elsif ( $ci_flag == 2 ) {
$ci -= 1;
$radjusted_levels->[$KK] += 1;
}
# unknown option
else {
# Shouldn't happen - leave ci unchanged
DEVEL_MODE && Fault(<<EOM);
unexpected ci flag '$ci_flag' for -bbpi -bbsbi -bbhbi: expecting one of 0 1 2
EOM
}
$rLL->[$KK]->[_CI_LEVEL_] = $ci if ( $ci >= 0 );
}
#------------------
# Store the results
#------------------
$self->[_rbreak_before_container_by_seqno_] =
$rbreak_before_container_by_seqno;
$self->[_rwant_reduced_ci_] = $rwant_reduced_ci;
return;
} ## end sub break_before_list_opening_containers
use constant DEBUG_XCI => 0;
sub extended_ci {
my ($self) = @_;
# This routine implements the -xci (--extended-continuation-indentation)
# flag. We add CI to interior tokens of a container which itself has CI but
# only if a token does not already have CI.
# To do this, we will locate opening tokens which themselves have
# continuation indentation (CI). We track them with their sequence
# numbers. These sequence numbers are called 'controlling sequence
# numbers'. They apply continuation indentation to the tokens that they
# contain. These inner tokens remember their controlling sequence numbers.
# Later, when these inner tokens are output, they have to see if the output
# lines with their controlling tokens were output with CI or not. If not,
# then they must remove their CI too.
# The controlling CI concept works hierarchically. But CI itself is not
# hierarchical; it is either on or off. There are some rare instances where
# it would be best to have hierarchical CI too, but not enough to be worth
# the programming effort.
# The operations to remove unwanted CI are done in sub 'undo_ci'.
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
my $ris_seqno_controlling_ci = $self->[_ris_seqno_controlling_ci_];
my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
my $rno_xci_by_seqno = $self->[_rno_xci_by_seqno_];
my $ris_bli_container = $self->[_ris_bli_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my %available_space;
# Loop over all opening container tokens
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
my $rK_sequenced_token_list = $self->[_rK_sequenced_token_list_];
my @seqno_stack;
my $seqno_top;
my $K_last;
# The following variable can be used to allow a little extra space to
# avoid blinkers. A value $len_tol = 20 fixed the following
# fixes cases: b1025 b1026 b1027 b1028 b1029 b1030 but NOT b1031.
# It turned out that the real problem was mis-parsing a list brace as
# a code block in a 'use' statement when the line length was extremely
# small. A value of 0 works now, but a slightly larger value can
# be used to minimize the chance of a blinker.
my $len_tol = 0;
foreach my $KK ( @{$rK_sequenced_token_list} ) {
# Fix all tokens up to the next sequence item if we are changing CI
if ($seqno_top) {
my $is_list = $ris_list_by_seqno->{$seqno_top};
my $space = $available_space{$seqno_top};
my $count = 0;
foreach my $Kt ( $K_last + 1 .. $KK - 1 ) {
next if ( $rLL->[$Kt]->[_CI_LEVEL_] );
# But do not include tokens which might exceed the line length
# and are not in a list.
# ... This fixes case b1031
if ( $is_list
|| $rLL->[$Kt]->[_TOKEN_LENGTH_] < $space
|| $rLL->[$Kt]->[_TYPE_] eq '#' )
{
$rLL->[$Kt]->[_CI_LEVEL_] = 1;
$rseqno_controlling_my_ci->{$Kt} = $seqno_top;
$count++;
}
}
$ris_seqno_controlling_ci->{$seqno_top} += $count;
}
$K_last = $KK;
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
# see if we have reached the end of the current controlling container
if ( $seqno_top && $seqno == $seqno_top ) {
$seqno_top = pop @seqno_stack;
}
# Patch to fix some block types...
# Certain block types arrive from the tokenizer without CI but should
# have it for this option. These include anonymous subs and
# do sort map grep eval
my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type && $is_block_with_ci{$block_type} ) {
$rLL->[$KK]->[_CI_LEVEL_] = 1;
if ($seqno_top) {
$rseqno_controlling_my_ci->{$KK} = $seqno_top;
$ris_seqno_controlling_ci->{$seqno_top}++;
}
}
# If this does not have ci, update ci if necessary and continue looking
else {
if ( !$rLL->[$KK]->[_CI_LEVEL_] ) {
if ($seqno_top) {
$rLL->[$KK]->[_CI_LEVEL_] = 1;
$rseqno_controlling_my_ci->{$KK} = $seqno_top;
$ris_seqno_controlling_ci->{$seqno_top}++;
}
next;
}
}
# We are looking for opening container tokens with ci
my $K_opening = $K_opening_container->{$seqno};
next unless ( defined($K_opening) && $KK == $K_opening );
# Make sure there is a corresponding closing container
# (could be missing if the script has a brace error)
my $K_closing = $K_closing_container->{$seqno};
next unless defined($K_closing);
# Skip if requested by -bbx to avoid blinkers
next if ( $rno_xci_by_seqno->{$seqno} );
# Skip if this is a -bli container (this fixes case b1065) Note: case
# b1065 is also fixed by the update for b1055, so this update is not
# essential now. But there does not seem to be a good reason to add
# xci and bli together, so the update is retained.
next if ( $ris_bli_container->{$seqno} );
# Require different input lines. This will filter out a large number
# of small hash braces and array brackets. If we accidentally filter
# out an important container, it will get fixed on the next pass.
if (
$rLL->[$K_opening]->[_LINE_INDEX_] ==
$rLL->[$K_closing]->[_LINE_INDEX_]
&& ( $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
$rLL->[$K_opening]->[_CUMULATIVE_LENGTH_] >
$rOpts_maximum_line_length )
)
{
DEBUG_XCI
&& print "XCI: Skipping seqno=$seqno, require different lines\n";
next;
}
# Do not apply -xci if adding extra ci will put the container contents
# beyond the line length limit (fixes cases b899 b935)
my $level = $rLL->[$K_opening]->[_LEVEL_];
my $ci_level = $rLL->[$K_opening]->[_CI_LEVEL_];
my $maximum_text_length =
$maximum_text_length_at_level[$level] -
$ci_level * $rOpts_continuation_indentation;
# Fix for b1197 b1198 b1199 b1200 b1201 b1202
# Do not apply -xci if we are running out of space
# NOTE: Testing in v20240501 showed that this check is no longer
# needed for stability, but there is little point in removing it.
if ( $level >= $stress_level_beta ) {
DEBUG_XCI
&& print
"XCI: Skipping seqno=$seqno, level=$level exceeds stress level=$stress_level_beta\n";
next;
}
# remember how much space is available for patch b1031 above
my $space =
$maximum_text_length - $len_tol - $rOpts_continuation_indentation;
if ( $space < 0 ) {
DEBUG_XCI && print "XCI: Skipping seqno=$seqno, space=$space\n";
next;
}
DEBUG_XCI && print "XCI: OK seqno=$seqno, space=$space\n";
$available_space{$seqno} = $space;
# This becomes the next controlling container
push @seqno_stack, $seqno_top if ($seqno_top);
$seqno_top = $seqno;
}
return;
} ## end sub extended_ci
sub braces_left_setup {
# Called once per file to mark all -bl, -sbl, and -asbl containers
my $self = shift;
my $rOpts_bl = $rOpts->{'opening-brace-on-new-line'};
my $rOpts_sbl = $rOpts->{'opening-sub-brace-on-new-line'};
my $rOpts_asbl = $rOpts->{'opening-anonymous-sub-brace-on-new-line'};
return unless ( $rOpts_bl || $rOpts_sbl || $rOpts_asbl );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
# We will turn on this hash for braces controlled by these flags:
my $rbrace_left = $self->[_rbrace_left_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_asub_block = $self->[_ris_asub_block_];
my $ris_sub_block = $self->[_ris_sub_block_];
foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
my $block_type = $rblock_type_of_seqno->{$seqno};
# use -asbl flag for an anonymous sub block
if ( $ris_asub_block->{$seqno} ) {
if ($rOpts_asbl) {
$rbrace_left->{$seqno} = 1;
}
}
# use -sbl flag for a named sub
elsif ( $ris_sub_block->{$seqno} ) {
if ($rOpts_sbl) {
$rbrace_left->{$seqno} = 1;
}
}
# use -bl flag if not a sub block of any type
else {
if ( $rOpts_bl
&& $block_type =~ /$bl_pattern/
&& $block_type !~ /$bl_exclusion_pattern/ )
{
$rbrace_left->{$seqno} = 1;
}
}
}
return;
} ## end sub braces_left_setup
sub bli_adjustment {
# Called once per file to implement the --brace-left-and-indent option.
# If -bli is set, adds one continuation indentation for certain braces
my $self = shift;
return unless ( $rOpts->{'brace-left-and-indent'} );
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $ris_bli_container = $self->[_ris_bli_container_];
my $rbrace_left = $self->[_rbrace_left_];
my $K_opening_container = $self->[_K_opening_container_];
my $K_closing_container = $self->[_K_closing_container_];
foreach my $seqno ( keys %{$rblock_type_of_seqno} ) {
my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type
&& $block_type =~ /$bli_pattern/
&& $block_type !~ /$bli_exclusion_pattern/ )
{
$ris_bli_container->{$seqno} = 1;
$rbrace_left->{$seqno} = 1;
my $Ko = $K_opening_container->{$seqno};
my $Kc = $K_closing_container->{$seqno};
if ( defined($Ko) && defined($Kc) ) {
$rLL->[$Kc]->[_CI_LEVEL_] = ++$rLL->[$Ko]->[_CI_LEVEL_];
}
}
}
return;
} ## end sub bli_adjustment
sub find_multiline_qw {
my ( $self, $rqw_lines ) = @_;
# Multiline qw quotes are not sequenced items like containers { [ (
# but behave in some respects in a similar way. So this routine finds them
# and creates a separate sequence number system for later use.
# This is straightforward because they always begin at the end of one line
# and end at the beginning of a later line. This is true no matter how we
# finally make our line breaks, so we can find them before deciding on new
# line breaks.
# Input parameter:
# if $rqw_lines is defined it is a ref to array of all line index numbers
# for which there is a type 'q' qw quote at either end of the line. This
# was defined by sub resync_lines_and_tokens for efficiency.
#
my $rlines = $self->[_rlines_];
# if $rqw_lines is not defined (this will occur with -io option) then we
# will have to scan all lines.
if ( !defined($rqw_lines) ) {
$rqw_lines = [ 0 .. @{$rlines} - 1 ];
}
# if $rqw_lines is defined but empty, just return because there are no
# multiline qw's
else {
if ( !@{$rqw_lines} ) { return }
}
my $rstarting_multiline_qw_seqno_by_K = {};
my $rending_multiline_qw_seqno_by_K = {};
my $rKrange_multiline_qw_by_seqno = {};
my $rmultiline_qw_has_extra_level = {};
my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
my $rLL = $self->[_rLL_];
my $qw_seqno;
my $num_qw_seqno = 0;
my $K_start_multiline_qw;
# For reference, here is the old loop, before $rqw_lines became available:
## foreach my $line_of_tokens ( @{$rlines} ) {
foreach my $iline ( @{$rqw_lines} ) {
my $line_of_tokens = $rlines->[$iline];
# Note that these first checks are required in case we have to scan
# all lines, not just lines with type 'q' at the ends.
my $line_type = $line_of_tokens->{_line_type};
next unless ( $line_type eq 'CODE' );
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
next unless ( defined($Kfirst) && defined($Klast) ); # skip blank line
# Continuing a sequence of qw lines ...
if ( defined($K_start_multiline_qw) ) {
my $type = $rLL->[$Kfirst]->[_TYPE_];
# shouldn't happen
if ( $type ne 'q' ) {
DEVEL_MODE && print {*STDERR} <<EOM;
STRANGE: started multiline qw at K=$K_start_multiline_qw but didn't see q qw at K=$Kfirst\n";
EOM
$K_start_multiline_qw = undef;
next;
}
my $Kprev = $self->K_previous_nonblank($Kfirst);
my $Knext = $self->K_next_nonblank($Kfirst);
my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
if ( $type_m eq 'q' && $type_p ne 'q' ) {
$rending_multiline_qw_seqno_by_K->{$Kfirst} = $qw_seqno;
$rKrange_multiline_qw_by_seqno->{$qw_seqno} =
[ $K_start_multiline_qw, $Kfirst ];
$K_start_multiline_qw = undef;
$qw_seqno = undef;
}
}
# Starting a new a sequence of qw lines ?
if ( !defined($K_start_multiline_qw)
&& $rLL->[$Klast]->[_TYPE_] eq 'q' )
{
my $Kprev = $self->K_previous_nonblank($Klast);
my $Knext = $self->K_next_nonblank($Klast);
my $type_m = defined($Kprev) ? $rLL->[$Kprev]->[_TYPE_] : 'b';
my $type_p = defined($Knext) ? $rLL->[$Knext]->[_TYPE_] : 'b';
if ( $type_m ne 'q' && $type_p eq 'q' ) {
$num_qw_seqno++;
$qw_seqno = 'q' . $num_qw_seqno;
$K_start_multiline_qw = $Klast;
$rstarting_multiline_qw_seqno_by_K->{$Klast} = $qw_seqno;
}
}
}
# Give multiline qw lists extra indentation instead of CI. This option
# works well but is currently only activated when the -xci flag is set.
# The reason is to avoid unexpected changes in formatting.
if ($rOpts_extended_continuation_indentation) {
foreach my $qw_seqno_x ( keys %{$rKrange_multiline_qw_by_seqno} ) {
my $rKrange = $rKrange_multiline_qw_by_seqno->{$qw_seqno_x};
my ( $Kbeg, $Kend ) = @{$rKrange};
# require isolated closing token
my $token_end = $rLL->[$Kend]->[_TOKEN_];
my $is_isolated_closing = length($token_end) == 1
&& ( $is_closing_token{$token_end} || $token_end eq '>' );
next unless ($is_isolated_closing);
# require isolated opening token
my $token_beg = $rLL->[$Kbeg]->[_TOKEN_];
# allow space(s) after the qw
if ( length($token_beg) > 3 && substr( $token_beg, 2, 1 ) =~ m/\s/ )
{
$token_beg =~ s/\s+//;
}
next unless ( length($token_beg) == 3 );
foreach my $KK ( $Kbeg + 1 .. $Kend - 1 ) {
$rLL->[$KK]->[_LEVEL_]++;
$rLL->[$KK]->[_CI_LEVEL_] = 0;
}
# set flag for -wn option, which will remove the level
$rmultiline_qw_has_extra_level->{$qw_seqno_x} = 1;
}
}
# For the -lp option we need to mark all parent containers of
# multiline quotes
if ( $rOpts_line_up_parentheses && !$rOpts_extended_line_up_parentheses ) {
foreach my $qw_seqno_x ( keys %{$rKrange_multiline_qw_by_seqno} ) {
my $rKrange = $rKrange_multiline_qw_by_seqno->{$qw_seqno_x};
my ( $Kbeg, $Kend ) = @{$rKrange};
my $parent_seqno = $self->parent_seqno_by_K($Kend);
next unless ($parent_seqno);
# If the parent container exactly surrounds this qw, then -lp
# formatting seems to work so we will not mark it.
my $is_tightly_contained;
my $Kn = $self->K_next_nonblank($Kend);
my $seqno_n = defined($Kn) ? $rLL->[$Kn]->[_TYPE_SEQUENCE_] : undef;
if ( defined($seqno_n) && $seqno_n eq $parent_seqno ) {
my $Kp = $self->K_previous_nonblank($Kbeg);
my $seqno_p =
defined($Kp) ? $rLL->[$Kp]->[_TYPE_SEQUENCE_] : undef;
if ( defined($seqno_p) && $seqno_p eq $parent_seqno ) {
$is_tightly_contained = 1;
}
}
$ris_excluded_lp_container->{$parent_seqno} = 1
unless ($is_tightly_contained);
# continue up the tree marking parent containers
$self->mark_parent_containers( $parent_seqno,
$ris_excluded_lp_container );
}
}
$self->[_rstarting_multiline_qw_seqno_by_K_] =
$rstarting_multiline_qw_seqno_by_K;
$self->[_rending_multiline_qw_seqno_by_K_] =
$rending_multiline_qw_seqno_by_K;
$self->[_rKrange_multiline_qw_by_seqno_] = $rKrange_multiline_qw_by_seqno;
$self->[_rmultiline_qw_has_extra_level_] = $rmultiline_qw_has_extra_level;
return;
} ## end sub find_multiline_qw
use constant DEBUG_COLLAPSED_LENGTHS => 0;
# Minimum space reserved for contents of a code block. A value of 40 has given
# reasonable results. With a large line length, say -l=120, this will not
# normally be noticeable but it will prevent making a mess in some edge cases.
use constant MIN_BLOCK_LEN => 40;
my %is_handle_type;
BEGIN {
my @q = qw( w C U G i k => );
@is_handle_type{@q} = (1) x scalar(@q);
my $i = 0;
use constant {
_max_prong_len_ => $i++,
_handle_len_ => $i++,
_seqno_o_ => $i++,
_iline_o_ => $i++,
_K_o_ => $i++,
_K_c_ => $i++,
_interrupted_list_rule_ => $i++,
};
} ## end BEGIN
sub is_fragile_block_type {
my ( $self, $block_type, $seqno ) = @_;
# Given:
# $block_type = the block type of a token, and
# $seqno = its sequence number
# Return:
# true if this block type stays broken after being broken,
# false otherwise
# This sub has been added to isolate a tricky decision needed
# to fix issue b1428.
# The coding here needs to agree with:
# - sub process_line where variable '$rbrace_follower' is set
# - sub process_line_inner_loop where variable '$is_opening_BLOCK' is set,
if ( $is_sort_map_grep_eval{$block_type}
|| $block_type eq 't'
|| $self->[_rshort_nested_]->{$seqno} )
{
return 0;
}
return 1;
} ## end sub is_fragile_block_type
{ ## closure xlp_collapsed_lengths
my $max_prong_len;
my $len;
my $last_nonblank_type;
my @stack;
sub xlp_collapsed_lengths_initialize {
$max_prong_len = 0;
$len = 0;
$last_nonblank_type = 'b';
@stack = ();
push @stack, [
0, # $max_prong_len,
0, # $handle_len,
SEQ_ROOT, # $seqno,
undef, # $iline,
undef, # $KK,
undef, # $K_c,
undef, # $interrupted_list_rule
];
return;
} ## end sub xlp_collapsed_lengths_initialize
sub cumulative_length_to_comma {
my ( $self, $KK, $K_comma, $K_closing ) = @_;
# Given:
# $KK = index of starting token, or blank before start
# $K_comma = index of line-ending comma
# $K_closing = index of the container closing token
# Return:
# $length = cumulative length of the term
my $rLL = $self->[_rLL_];
if ( $rLL->[$KK]->[_TYPE_] eq 'b' ) { $KK++ }
my $length = 0;
if (
$KK < $K_comma
&& $rLL->[$K_comma]->[_TYPE_] eq ',' # should be true
# Ignore if terminal comma, causes instability (b1297,
# b1330)
&& (
$K_closing - $K_comma > 2
|| ( $K_closing - $K_comma == 2
&& $rLL->[ $K_comma + 1 ]->[_TYPE_] ne 'b' )
)
# The comma should be in this container
&& ( $rLL->[$K_comma]->[_LEVEL_] - 1 ==
$rLL->[$K_closing]->[_LEVEL_] )
)
{
# An additional check: if line ends in ), and the ) has vtc then
# skip this estimate. Otherwise, vtc can give oscillating results.
# Fixes b1448. For example, this could be unstable:
# ( $os ne 'win' ? ( -selectcolor => "red" ) : () ),
# | |^--K_comma
# | ^-- K_prev
# ^--- KK
# An alternative, possibly better strategy would be to try to turn
# off -vtc locally, but it turns out to be difficult to locate the
# appropriate closing token when it is not on the same line as its
# opening token.
my $K_prev = $self->K_previous_nonblank($K_comma);
if ( defined($K_prev)
&& $K_prev >= $KK
&& $rLL->[$K_prev]->[_TYPE_SEQUENCE_] )
{
my $token = $rLL->[$K_prev]->[_TOKEN_];
my $type = $rLL->[$K_prev]->[_TYPE_];
if ( $closing_vertical_tightness{$token} && $type ne 'R' ) {
## type 'R' does not normally get broken, so ignore
## skip length calculation
return 0;
}
}
my $starting_len =
$KK >= 0 ? $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_] : 0;
$length = $rLL->[$K_comma]->[_CUMULATIVE_LENGTH_] - $starting_len;
}
return $length;
} ## end sub cumulative_length_to_comma
sub xlp_collapsed_lengths {
my $self = shift;
#----------------------------------------------------------------
# Define the collapsed lengths of containers for -xlp indentation
#----------------------------------------------------------------
# We need an estimate of the minimum required line length starting at
# any opening container for the -xlp style. This is needed to avoid
# using too much indentation space for lower level containers and
# thereby running out of space for outer container tokens due to the
# maximum line length limit.
# The basic idea is that at each node in the tree we imagine that we
# have a fork with a handle and collapsible prongs:
#
# |------------
# |--------
# ------------|-------
# handle |------------
# |--------
# prongs
#
# Each prong has a minimum collapsed length. The collapsed length at a
# node is the maximum of these minimum lengths, plus the handle length.
# Each of the prongs may itself be a tree node.
# This is just a rough calculation to get an approximate starting point
# for indentation. Later routines will be more precise. It is
# important that these estimates be independent of the line breaks of
# the input stream in order to avoid instabilities.
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
my $rK_next_seqno_by_K = $self->[_rK_next_seqno_by_K_];
my $K_start_multiline_qw;
my $level_start_multiline_qw = 0;
xlp_collapsed_lengths_initialize();
#--------------------------------
# Loop over all lines in the file
#--------------------------------
my $iline = -1;
my $skip_next_line;
foreach my $line_of_tokens ( @{$rlines} ) {
$iline++;
if ($skip_next_line) {
$skip_next_line = 0;
next;
}
my $line_type = $line_of_tokens->{_line_type};
next if ( $line_type ne 'CODE' );
my $CODE_type = $line_of_tokens->{_code_type};
# Always skip blank lines
next if ( $CODE_type eq 'BL' );
# Note on other line types:
# 'FS' (Format Skipping) lines may contain opening/closing tokens so
# we have to process them to keep the stack correctly sequenced
# 'VB' (Verbatim) lines could be skipped, but testing shows that
# results look better if we include their lengths.
# Also note that we could exclude -xlp formatting of containers with
# 'FS' and 'VB' lines, but in testing that was not really beneficial
# So we process tokens in 'FS' and 'VB' lines like all the rest...
my $rK_range = $line_of_tokens->{_rK_range};
my ( $K_first, $K_last ) = @{$rK_range};
next unless ( defined($K_first) && defined($K_last) );
my $has_comment = $rLL->[$K_last]->[_TYPE_] eq '#';
# Always ignore block comments
next if ( $has_comment && $K_first == $K_last );
# Handle an intermediate line of a multiline qw quote. These may
# require including some -ci or -i spaces. See cases c098/x063.
# Updated to check all lines (not just $K_first==$K_last) to fix
# b1316
my $K_begin_loop = $K_first;
if ( $rLL->[$K_first]->[_TYPE_] eq 'q' ) {
my $KK = $K_first;
my $level = $rLL->[$KK]->[_LEVEL_];
my $ci_level = $rLL->[$KK]->[_CI_LEVEL_];
# remember the level of the start
if ( !defined($K_start_multiline_qw) ) {
$K_start_multiline_qw = $K_first;
$level_start_multiline_qw = $level;
my $seqno_qw =
$self->[_rstarting_multiline_qw_seqno_by_K_]
->{$K_start_multiline_qw};
if ( !$seqno_qw ) {
my $Kp = $self->K_previous_nonblank($K_first);
if ( defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' ) {
$K_start_multiline_qw = $Kp;
$level_start_multiline_qw =
$rLL->[$K_start_multiline_qw]->[_LEVEL_];
}
else {
# Fix for b1319, b1320
$K_start_multiline_qw = undef;
}
}
}
if ( defined($K_start_multiline_qw) ) {
$len = $rLL->[$KK]->[_CUMULATIVE_LENGTH_] -
$rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
# We may have to add the spaces of one level or ci level
# ... it depends depends on the -xci flag, the -wn flag,
# and if the qw uses a container token as the quote
# delimiter.
# First rule: add ci if there is a $ci_level
if ($ci_level) {
$len += $rOpts_continuation_indentation;
}
# Second rule: otherwise, look for an extra indentation
# level from the start and add one indentation level if
# found.
else {
if ( $level > $level_start_multiline_qw ) {
$len += $rOpts_indent_columns;
}
}
if ( $len > $max_prong_len ) { $max_prong_len = $len }
$last_nonblank_type = 'q';
$K_begin_loop = $K_first + 1;
# We can skip to the next line if more tokens
next if ( $K_begin_loop > $K_last );
}
}
# If starting in quote type Q we have no control over indentation
# so just ignore the length of this token (see git #138)
elsif ( $rLL->[$K_first]->[_TYPE_] eq 'Q' ) {
if ( $line_of_tokens->{_starting_in_quote} ) {
$K_begin_loop = $K_first + 1;
next if ( $K_begin_loop > $K_last );
}
}
else {
}
$K_start_multiline_qw = undef;
# Find the terminal token, before any side comment
my $K_terminal = $K_last;
if ($has_comment) {
$K_terminal -= 1;
$K_terminal -= 1
if ( $rLL->[$K_terminal]->[_TYPE_] eq 'b'
&& $K_terminal > $K_first );
}
# Use length to terminal comma if interrupted list rule applies
if ( @stack && $stack[-1]->[_interrupted_list_rule_] ) {
my $K_c = $stack[-1]->[_K_c_];
if ( defined($K_c) ) {
#----------------------------------------------------------
# BEGIN patch for issue b1408: If this line ends in an
# opening token, look for the closing token and comma at
# the end of the next line. If so, combine the two lines to
# get the correct sums. This problem seems to require -xlp
# -vtc=2 and blank lines to occur. Use %is_opening_type to
# fix b1431.
#----------------------------------------------------------
if ( $is_opening_type{ $rLL->[$K_terminal]->[_TYPE_] }
&& !$has_comment )
{
my $seqno_end = $rLL->[$K_terminal]->[_TYPE_SEQUENCE_];
my $Kc_test = $rK_next_seqno_by_K->[$K_terminal];
# We are looking for a short broken remnant on the next
# line; something like the third line here (b1408):
# parent =>
# Moose::Util::TypeConstraints::find_type_constraint(
# 'RefXX' ),
# or this
#
# Help::WorkSubmitter->_filter_chores_and_maybe_warn_user(
# $story_set_all_chores),
# or this (b1431):
# $issue->{
# 'borrowernumber'}, # borrowernumber
if ( defined($Kc_test)
&& $seqno_end == $rLL->[$Kc_test]->[_TYPE_SEQUENCE_]
&& $rLL->[$Kc_test]->[_LINE_INDEX_] == $iline + 1 )
{
my $line_of_tokens_next = $rlines->[ $iline + 1 ];
my $rtype_count =
$rtype_count_by_seqno->{$seqno_end};
my ( $K_first_next, $K_terminal_next ) =
@{ $line_of_tokens_next->{_rK_range} };
# backup at a side comment
if ( defined($K_terminal_next)
&& $rLL->[$K_terminal_next]->[_TYPE_] eq '#' )
{
my $Kprev =
$self->K_previous_nonblank($K_terminal_next);
if ( defined($Kprev)
&& $Kprev >= $K_first_next )
{
$K_terminal_next = $Kprev;
}
}
if (
defined($K_terminal_next)
# next line ends with a comma
&& $rLL->[$K_terminal_next]->[_TYPE_] eq ','
# which follows the closing container token
&& (
$K_terminal_next - $Kc_test == 1
|| ( $K_terminal_next - $Kc_test == 2
&& $rLL->[ $K_terminal_next - 1 ]
->[_TYPE_] eq 'b' )
)
# no commas in the container
&& ( !defined($rtype_count)
|| !$rtype_count->{','} )
# for now, restrict this to a container with
# just 1 or two tokens
&& $K_terminal_next - $K_terminal <= 5
)
{
# combine the next line with the current line
$K_terminal = $K_terminal_next;
$skip_next_line = 1;
if (DEBUG_COLLAPSED_LENGTHS) {
print "Combining lines at line $iline\n";
}
}
}
}
#--------------------------
# END patch for issue b1408
#--------------------------
if ( $rLL->[$K_terminal]->[_TYPE_] eq ',' ) {
my $length =
$self->cumulative_length_to_comma( $K_first,
$K_terminal, $K_c );
# Fix for b1331: at a broken => item, include the
# length of the previous half of the item plus one for
# the missing space
if ( $last_nonblank_type eq '=>' ) {
$length += $len + 1;
}
if ( $length > $max_prong_len ) {
$max_prong_len = $length;
}
}
}
}
#----------------------------------
# Loop over all tokens on this line
#----------------------------------
$self->xlp_collapse_lengths_inner_loop( $iline, $K_begin_loop,
$K_terminal, $K_last );
# Now take care of any side comment;
if ($has_comment) {
if ($rOpts_ignore_side_comment_lengths) {
$len = 0;
}
else {
# For a side comment when -iscl is not set, measure length from
# the start of the previous nonblank token
my $len0 =
$K_terminal > 0
? $rLL->[ $K_terminal - 1 ]->[_CUMULATIVE_LENGTH_]
: 0;
$len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] - $len0;
if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
}
} ## end loop over lines
if (DEBUG_COLLAPSED_LENGTHS) {
print "\nCollapsed lengths--\n";
foreach
my $key ( sort { $a <=> $b } keys %{$rcollapsed_length_by_seqno} )
{
my $clen = $rcollapsed_length_by_seqno->{$key};
print "$key -> $clen\n";
}
}
return;
} ## end sub xlp_collapsed_lengths
sub xlp_collapse_lengths_inner_loop {
my ( $self, $iline, $K_begin_loop, $K_terminal, $K_last ) = @_;
# Loop over tokens on a line for sub xlp_collapse_lengths
# Given:
# $iline = line number in input stream
# ($K_begin_loop, $K_terminal) = token index range to scan
# $K_last = last token index on this line
my $rLL = $self->[_rLL_];
my $K_closing_container = $self->[_K_closing_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $rcollapsed_length_by_seqno = $self->[_rcollapsed_length_by_seqno_];
my $ris_permanently_broken = $self->[_ris_permanently_broken_];
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
my $rhas_broken_list = $self->[_rhas_broken_list_];
my $rtype_count_by_seqno = $self->[_rtype_count_by_seqno_];
#----------------------------------
# Loop over tokens on this line ...
#----------------------------------
my $type;
foreach my $KK ( $K_begin_loop .. $K_terminal ) {
next if ( ( $type = $rLL->[$KK]->[_TYPE_] ) eq 'b' );
#------------------------
# Handle sequenced tokens
#------------------------
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ($seqno) {
my $token = $rLL->[$KK]->[_TOKEN_];
#----------------------------
# Entering a new container...
#----------------------------
if ( $is_opening_token{$token}
&& defined( $K_closing_container->{$seqno} ) )
{
# save current prong length
$stack[-1]->[_max_prong_len_] = $max_prong_len;
$max_prong_len = 0;
# Start new prong one level deeper
my $handle_len = 0;
if ( $rblock_type_of_seqno->{$seqno} ) {
# code blocks do not use -lp indentation, but behave as
# if they had a handle of one indentation length
$handle_len = $rOpts_indent_columns;
}
else {
if ( $is_handle_type{$last_nonblank_type} ) {
$handle_len = $len;
$handle_len += 1
if ( $KK > 0
&& $rLL->[ $KK - 1 ]->[_TYPE_] eq 'b' );
}
}
# Set a flag if the 'Interrupted List Rule' will be applied
# (see sub copy_old_breakpoints).
# - Added check on has_broken_list to fix issue b1298
my $interrupted_list_rule =
$ris_permanently_broken->{$seqno}
&& $ris_list_by_seqno->{$seqno}
&& !$rhas_broken_list->{$seqno}
&& !$rOpts_ignore_old_breakpoints;
# NOTES: Since we are looking at old line numbers we have
# to be very careful not to introduce an instability.
# This following causes instability (b1288-b1296):
# $interrupted_list_rule ||=
# $rOpts_break_at_old_comma_breakpoints;
# - We could turn off the interrupted list rule if there is
# a broken sublist, to follow 'Compound List Rule 1'.
# - We could use the _rhas_broken_list_ flag for this.
# - But it seems safer not to do this, to avoid
# instability, since the broken sublist could be
# temporary. It seems better to let the formatting
# stabilize by itself after one or two iterations.
# - So, not doing this for now
# Turn off the interrupted list rule if -vmll is set and a
# list has '=>' characters. This avoids instabilities due
# to dependence on old line breaks; issue b1325.
if ( $interrupted_list_rule
&& $rOpts_variable_maximum_line_length )
{
my $rtype_count = $rtype_count_by_seqno->{$seqno};
if ( $rtype_count && $rtype_count->{'=>'} ) {
$interrupted_list_rule = 0;
}
}
my $K_c = $K_closing_container->{$seqno};
# Add length of any terminal list item if interrupted
# so that the result is the same as if the term is
# in the next line (b1446).
if (
$interrupted_list_rule
&& $KK < $K_terminal
# The line should end in a comma
# NOTE: this currently assumes break after comma.
# As long as the other call to cumulative_length..
# makes the same assumption we should remain stable.
&& $rLL->[$K_terminal]->[_TYPE_] eq ','
)
{
$max_prong_len =
$self->cumulative_length_to_comma( $KK + 1,
$K_terminal, $K_c );
}
push @stack, [
$max_prong_len,
$handle_len,
$seqno,
$iline,
$KK,
$K_c,
$interrupted_list_rule
];
}
#--------------------
# Exiting a container
#--------------------
elsif ( $is_closing_token{$token} && @stack ) {
# The current prong ends - get its handle
my $item = pop @stack;
my $handle_len = $item->[_handle_len_];
my $seqno_o = $item->[_seqno_o_];
my $iline_o = $item->[_iline_o_];
my $K_o = $item->[_K_o_];
my $K_c_expect = $item->[_K_c_];
my $collapsed_len = $max_prong_len;
if ( $seqno_o ne $seqno ) {
# This can happen if input file has brace errors.
# Otherwise it shouldn't happen. Not fatal but -lp
# formatting could get messed up.
if ( DEVEL_MODE && !get_saw_brace_error() ) {
Fault(<<EOM);
sequence numbers differ; at CLOSING line $iline, seq=$seqno, Kc=$KK .. at OPENING line $iline_o, seq=$seqno_o, Ko=$K_o, expecting Kc=$K_c_expect
EOM
}
}
#------------------------------------------
# Rules to avoid scrunching code blocks ...
#------------------------------------------
# Some test cases:
# c098/x107 x108 x110 x112 x114 x115 x117 x118 x119
my $block_type = $rblock_type_of_seqno->{$seqno};
if ($block_type) {
my $K_c = $KK;
my $block_length = MIN_BLOCK_LEN;
my $is_one_line_block;
my $level = $rLL->[$K_o]->[_LEVEL_];
if ( defined($K_o) && defined($K_c) ) {
# note: fixed 3 May 2022 (removed 'my')
$block_length =
$rLL->[ $K_c - 1 ]->[_CUMULATIVE_LENGTH_] -
$rLL->[$K_o]->[_CUMULATIVE_LENGTH_];
$is_one_line_block = $iline == $iline_o;
}
# Code block rule 1: Use the total block length if
# it is less than the minimum.
if ( $block_length < MIN_BLOCK_LEN ) {
$collapsed_len = $block_length;
}
# Code block rule 2: Use the full length of a
# one-line block to avoid breaking it, unless
# extremely long. We do not need to do a precise
# check here, because if it breaks then it will
# stay broken on later iterations.
elsif (
$is_one_line_block
&& $block_length <
$maximum_line_length_at_level[$level]
# But skip this for blocks types which can reform,
# like sort/map/grep/eval blocks, to avoid
# instability (b1345, b1428)
&& $self->is_fragile_block_type( $block_type,
$seqno )
)
{
$collapsed_len = $block_length;
}
# Code block rule 3: Otherwise the length should be
# at least MIN_BLOCK_LEN to avoid scrunching code
# blocks.
elsif ( $collapsed_len < MIN_BLOCK_LEN ) {
$collapsed_len = MIN_BLOCK_LEN;
}
else {
# none of these rules applies
}
}
# Store the result. Some extra space, '2', allows for
# length of an opening token, inside space, comma, ...
# This constant has been tuned to give good overall
# results.
$collapsed_len += 2;
$rcollapsed_length_by_seqno->{$seqno} = $collapsed_len;
# Restart scanning the lower level prong
if (@stack) {
$max_prong_len = $stack[-1]->[_max_prong_len_];
$collapsed_len += $handle_len;
if ( $collapsed_len > $max_prong_len ) {
$max_prong_len = $collapsed_len;
}
}
}
# it is a ternary or input file is unbalanced
else {
}
$len = 0;
$last_nonblank_type = $type;
next;
}
#----------------------------
# Handle non-container tokens
#----------------------------
my $token_length = $rLL->[$KK]->[_TOKEN_LENGTH_];
# Count lengths of things like 'xx => yy' as a single item
if ( $type eq '=>' ) {
$len += $token_length + 1;
# fix $len for -naws, issue b1457
if ( !$rOpts_add_whitespace ) {
if ( defined( $rLL->[ $KK + 1 ] )
&& $rLL->[ $KK + 1 ]->[_TYPE_] ne 'b' )
{
$len -= 1;
}
}
if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
elsif ( $last_nonblank_type eq '=>' ) {
$len += $token_length;
if ( $len > $max_prong_len ) { $max_prong_len = $len }
# but only include one => per item
$len = $token_length;
}
# include everything to end of line after a here target
elsif ( $type eq 'h' ) {
$len = $rLL->[$K_last]->[_CUMULATIVE_LENGTH_] -
$rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
# for everything else just use the token length
else {
$len = $token_length;
if ( $len > $max_prong_len ) { $max_prong_len = $len }
}
$last_nonblank_type = $type;
} ## end loop over tokens on this line
return;
} ## end sub xlp_collapse_lengths_inner_loop
} ## end closure xlp_collapsed_lengths
sub is_excluded_lp {
my ( $self, $KK ) = @_;
# Decide if this container is excluded by user request
# Given:
# $KK = index of the container opening token
# Return:
# true if this token is excluded (i.e., may not use -lp)
# false otherwise
# The control hash can either describe:
# what to exclude: $line_up_parentheses_control_is_lpxl = 1, or
# what to include: $line_up_parentheses_control_is_lpxl = 0
my $rLL = $self->[_rLL_];
my $rtoken_vars = $rLL->[$KK];
my $token = $rtoken_vars->[_TOKEN_];
my $rflags = $line_up_parentheses_control_hash{$token};
#-----------------------------------------------
# TEST #1: check match to listed container types
#-----------------------------------------------
if ( !defined($rflags) ) {
# There is no entry for this container, so we are done
return !$line_up_parentheses_control_is_lpxl;
}
my ( $flag1, $flag2 ) = @{$rflags};
#-----------------------------------------------------------
# TEST #2: check match to flag1, the preceding nonblank word
#-----------------------------------------------------------
my $match_flag1 = !defined($flag1) || $flag1 eq '*';
if ( !$match_flag1 ) {
# Find the previous token
my ( $is_f, $is_k, $is_w );
my $Kp = $self->K_previous_nonblank($KK);
if ( defined($Kp) ) {
my $type_p = $rLL->[$Kp]->[_TYPE_];
my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
# keyword?
$is_k = $type_p eq 'k';
# function call?
$is_f = $self->[_ris_function_call_paren_]->{$seqno};
# either keyword or function call?
$is_w = $is_k || $is_f;
}
# Check for match based on flag1 and the previous token:
if ( $flag1 eq 'k' ) { $match_flag1 = $is_k }
elsif ( $flag1 eq 'K' ) { $match_flag1 = !$is_k }
elsif ( $flag1 eq 'f' ) { $match_flag1 = $is_f }
elsif ( $flag1 eq 'F' ) { $match_flag1 = !$is_f }
elsif ( $flag1 eq 'w' ) { $match_flag1 = $is_w }
elsif ( $flag1 eq 'W' ) { $match_flag1 = !$is_w }
else {
## no match
DEVEL_MODE && Fault(<<EOM);
unexpected --lp-exclusion code '$flag1': expecting one of kKfFwW
EOM
}
}
# See if we can exclude this based on the flag1 test...
if ($line_up_parentheses_control_is_lpxl) {
return 1 if ($match_flag1);
}
else {
return 1 if ( !$match_flag1 );
}
#-------------------------------------------------------------
# TEST #3: exclusion based on flag2 and the container contents
#-------------------------------------------------------------
# Note that this is an exclusion test for both -lpxl or -lpil input methods
# The options are:
# 0 or blank: ignore container contents
# 1 exclude non-lists or lists with sublists
# 2 same as 1 but also exclude lists with code blocks
my $match_flag2;
if ($flag2) {
my $seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
my $has_list = $self->[_rhas_list_]->{$seqno};
my $has_code_block = $self->[_rhas_code_block_]->{$seqno};
my $has_ternary = $self->[_rhas_ternary_]->{$seqno};
if ( !$is_list
|| $has_list
|| $flag2 eq '2' && ( $has_code_block || $has_ternary ) )
{
$match_flag2 = 1;
}
}
return $match_flag2;
} ## end sub is_excluded_lp
sub set_excluded_lp_containers {
my ($self) = @_;
return unless ($rOpts_line_up_parentheses);
my $rLL = $self->[_rLL_];
return unless ( defined($rLL) && @{$rLL} );
my $K_opening_container = $self->[_K_opening_container_];
my $ris_excluded_lp_container = $self->[_ris_excluded_lp_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
foreach my $seqno ( keys %{$K_opening_container} ) {
# code blocks are always excluded by the -lp coding so we can skip them
next if ( $rblock_type_of_seqno->{$seqno} );
my $KK = $K_opening_container->{$seqno};
next unless defined($KK);
# see if a user exclusion rule turns off -lp for this container
if ( $self->is_excluded_lp($KK) ) {
$ris_excluded_lp_container->{$seqno} = 1;
}
}
return;
} ## end sub set_excluded_lp_containers
sub keep_old_blank_lines_exclusions {
my ( $self, $rwant_blank_line_after ) = @_;
# Set a flag to remove selected blank lines from the input stream
return if ( !%keep_old_blank_lines_exceptions );
my $top_control = $keep_old_blank_lines_exceptions{top};
my $bottom_control = $keep_old_blank_lines_exceptions{bottom};
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $i_first_blank; # first blank of a group
my $i_last_blank; # last blank of a group
my $line_CODE_info = sub {
# Given:
# $ii = index of a line
# Return:
# undef if not a line of code, or
# {Kfirst=>$Kfirst, Klast=>$Klast, CODE_type=>$CODE_type}
my ($ii) = @_;
return if ( $ii < 0 );
my $line_of_tokens = $rlines->[$ii];
my $line_type = $line_of_tokens->{_line_type};
return if ( $line_type ne 'CODE' );
my $CODE_type = $line_of_tokens->{_code_type};
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
return if ( !defined($Klast) );
return {
Kfirst => $Kfirst,
Klast => $Klast,
CODE_type => $CODE_type,
};
}; ## end $line_CODE_info = sub
my $top_match = sub {
my ($ii) = @_;
# Decide if line at index $ii matches the criterion in the control hash
# for deleting blank lines which follow this line.
# Possible top tests are : '{b' '}b' '#b'
# where 'b' denotes the blank line position
# Given:
# $ii = index of a line of code to be checked
# Return:
# false if no match
# 1 if match without restriction on blank line removal
# -1 for match which requires keeping 1 essential blank line
my $line_of_tokens = $rlines->[$ii];
my $line_type = $line_of_tokens->{_line_type};
return if ( $line_type ne 'CODE' );
# Note that we could also check for block comments here
# my $CODE_type = $line_of_tokens->{_code_type};
# return if ($CODE_type);
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
return if ( !defined($Klast) );
my $type_last = $rLL->[$Klast]->[_TYPE_];
# See if line has a comment
my $Klast_true = $Klast;
if ( $type_last eq '#' ) {
# For a full line comment...
if ( $Klast eq $Kfirst ) {
# Check for a block comment control, type '#b'
if ( $top_control->{$type_last} ) {
# Always keep 1 blank line if the lines above and below
# the blank lines are full-line comments
my $Kn = $self->K_next_nonblank($Klast);
return ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq '#' )
? -1
: 1;
}
# Nothing to do
return;
}
# For a side comment .. back up 1 token
$Klast = $self->K_previous_nonblank($Klast);
return if ( !defined($Klast) || $Klast < $Kfirst );
$type_last = $rLL->[$Klast]->[_TYPE_];
}
# Check for possible top test
if ( $top_control->{$type_last} ) {
# '{b' = inverse of -blao=i
# '}b' not an inverse but uses the -blao pattern if set
if ( $type_last ne '{' && $type_last ne '}' ) {
## unexpected type - shouldn't happen
DEVEL_MODE && Fault("Unexpected top test type: '$type_last'\n");
return;
}
my $seqno = $rLL->[$Klast]->[_TYPE_SEQUENCE_];
return if ( !$seqno );
my $block_type = $rblock_type_of_seqno->{$seqno};
if ( $block_type
&& $block_type =~ /$blank_lines_after_opening_block_pattern/ )
{
# This is a match ...
# Ok to delete all blanks if no side comment here
if ( $Klast_true == $Klast ) { return 1 }
# Ok to delete all blanks if no block comment ahead
my $Kn = $self->K_next_nonblank($Klast_true);
if ( !defined($Kn) || $rLL->[$Kn]->[_TYPE_] ne '#' ) {
return 1;
}
# If there is code below it is a block comment of some type.
# We must leave 1 blank if it is possible to form a hanging
# side comment.
# Ok to delete all blanks if this side comment is static
my $token = $rLL->[$Klast_true]->[_TOKEN_];
if ( $token =~ /$static_side_comment_pattern/ ) { return 1 }
my $rinfo = $line_CODE_info->( $i_last_blank + 1 );
# If no code below, then ok to delete blanks
return 1 if ( !defined($rinfo) );
# If static block comment below, ok to delete blanks
my $CODE_type_bottom = $rinfo->{CODE_type};
if ( $CODE_type_bottom eq 'SBC' || $CODE_type_bottom eq 'SBCX' )
{
return 1;
}
# The top line has simple side comment, the bottom line is a
# non-static comment, so we must keep at least 1 blank line to
# avoid forming a hanging side comment. This logic is slightly
# simplified but on the safe side.
return -1;
}
# Not a match
return;
}
# Not a match
return;
}; ## end $top_match = sub
my $bottom_match = sub {
my ($ii) = @_;
# Decide if line at index $ii matches the criterion in the control hash
# for deleting blank lines which precede this line.
# Possible bottom tests are : 'b#' 'b{' 'b}' 'bS' 'bP'
# where 'b' denotes the blank line position, S=sub, P=package
# Given:
# $ii = index of a line of code to be checked
# Return:
# false if no match
# 1 if match without restriction
# -1 for match which requires keeping 1 essential blank line
my $line_of_tokens = $rlines->[$ii];
my $line_type = $line_of_tokens->{_line_type};
return if ( $line_type ne 'CODE' );
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast ) = @{$rK_range};
return if ( !defined($Klast) );
my $type_last = $rLL->[$Klast]->[_TYPE_];
if ( $type_last eq '#' ) {
# Handle a full-line comment
if ( $Klast eq $Kfirst ) {
# Check for a block comment 'b#'
if ( $bottom_control->{$type_last} ) {
# This bottom line is a comment. Now check for comments
# above. Quick check:
my $Kp = $self->K_previous_nonblank($Kfirst);
if ( !defined($Kp) || $rLL->[$Kp]->[_TYPE_] ne '#' ) {
return 1;
}
# Only upper comment is possible
my $rinfo = $line_CODE_info->( $i_first_blank - 1 );
# No code above - ok to delete blanks
return 1 if ( !defined($rinfo) );
my $Kfirst_top = $rinfo->{Kfirst};
my $Klast_top = $rinfo->{Klast};
# If full line comment above then we must keep one blank
if ( $Kfirst_top == $Klast_top ) { return -1 }
# We should have a side comment above by the preliminary
# check
my $type_top = $rLL->[$Klast_top]->[_TYPE_];
return 1 if ( $type_top ne '#' ); ## shouldn't happen
# A static side comment above cannot form hanging side
# comment below - ok to remove all blank lines.
my $token_top = $rLL->[$Klast_top]->[_TOKEN_];
if ( $token_top =~ /$static_side_comment_pattern/ ) {
return 1;
}
# A static block comment below cannot form hanging side
# comment - ok to remove all blank lines.
my $CODE_type = $line_of_tokens->{_code_type};
if ( $CODE_type eq 'SBC' || $CODE_type eq 'SBCX' ) {
return 1;
}
# A new hanging side comment could be formed if we remove
# all blank lines, so we must leave 1
return -1;
}
# Not a match
return;
}
# This line has a side comment .. back up 1 token
$Klast = $self->K_previous_nonblank($Klast);
return if ( !defined($Klast) || $Klast < $Kfirst );
$type_last = $rLL->[$Klast]->[_TYPE_];
}
# Bottom tests: 'b{' 'b}' 'bS' 'bP'
# All of these are based on the first token of the line following
# the blank lines.
my $token_first = $rLL->[$Kfirst]->[_TOKEN_];
my $type_first = $rLL->[$Kfirst]->[_TYPE_];
# Special check for case 'b{', inverse of -bbb
if ( $type_first eq 'k' ) {
if ( $bottom_control->{'{'}
&& $is_if_unless_while_until_for_foreach{$token_first}
&& !$rLL->[$Kfirst]->[_CI_LEVEL_] )
{
# NOTE: we check ci to insure that this is not a trailing
# operation, but no checks are currently made to see if this is
# a one-line block. So this will remove more blanks
# than the corresponding -bbb option adds.
return 1;
}
# Apply 'S' to BEGIN and END blocks to make the inverse of -bbs
if ( $bottom_control->{'S'} ) {
if ( $token_first eq 'BEGIN' || $token_first eq 'END' ) {
return 1;
}
}
return;
}
# For other tests 'b}' 'bS' 'bP' the token types match
if ( $bottom_control->{$type_first} ) {
# 'b}' inverse of -blbc
if ( $type_first eq '}' ) {
my $seqno = $rLL->[$Kfirst]->[_TYPE_SEQUENCE_];
return if ( !$seqno );
my $block_type = $rblock_type_of_seqno->{$seqno};
return if ( !$block_type );
if (
$block_type =~ /$blank_lines_before_closing_block_pattern/ )
{
return 1;
}
}
elsif ( $type_first eq 'P' ) { return 1 }
elsif ( $type_first eq 'S' ) {
# NOTE: no checks are currently made to see if this is a
# one-line or multi-line sub. So this will remove more blanks
# than the corresponding -bbs option adds. And see above patch
# which makes this work for BEGIN and END blocks.
return 1;
}
else {
## unexpected type
}
}
return;
}; ## end $bottom_match = sub
my $end_blank_group = sub {
my ( ($ending_in_blank) ) = @_;
# Decide if the blank lines group in the index range
# $i_first_blank .. $i_last_blank should be deleted.
# Given:
# $ending_in_blank = true if the last blank is the end of file
# false if not
# Return:
# true if this group should be deleted
# false if not
if ( !defined($i_first_blank) || !defined($i_last_blank) ) { return }
# Check code line before start of blank group
my $delete_blanks;
if ( $top_control && $i_first_blank > 0 ) {
$delete_blanks = $top_match->( $i_first_blank - 1 );
}
# Check code line after end of blank group
if ( !$delete_blanks && $bottom_control && !$ending_in_blank ) {
$delete_blanks =
$bottom_match->( $i_last_blank + 1, $bottom_control );
}
# Signal deletion by setting the deletion flag for this group
if ($delete_blanks) {
# A negative $delete_blanks flag indicates to keep 1 essential blank
# See b1504 for example of conflict with kgb logic
if ( $delete_blanks < 0 ) { $i_first_blank++ }
foreach my $ii ( $i_first_blank .. $i_last_blank ) {
if ( !defined( $rwant_blank_line_after->{$ii} ) ) {
$rwant_blank_line_after->{$ii} = 2;
}
}
}
$i_first_blank = undef;
$i_last_blank = undef;
return;
}; ## end $end_blank_group = sub
# Main loop to locate groups of blank lines and decide if they
# they should be deleted
my $i = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
$i++;
my $line_type = $line_of_tokens->{_line_type};
if ( $line_type ne 'CODE' ) {
if ( defined($i_last_blank) ) {
$end_blank_group->();
}
next;
}
my $CODE_type = $line_of_tokens->{_code_type};
if ( $CODE_type eq 'BL' ) {
if ( !defined($i_first_blank) ) {
$i_first_blank = $i;
}
$i_last_blank = $i;
}
else {
if ( defined($i_first_blank) ) {
$end_blank_group->();
}
}
}
if ( defined($i_first_blank) ) {
$end_blank_group->(1);
}
return;
} ## end sub keep_old_blank_lines_exclusions
######################################
# CODE SECTION 6: Process line-by-line
######################################
sub process_all_lines {
my $self = shift;
#----------------------------------------------------------
# Main loop to format all lines of a file according to type
#----------------------------------------------------------
my $rlines = $self->[_rlines_];
my $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'};
my $file_writer_object = $self->[_file_writer_object_];
my $logger_object = $self->[_logger_object_];
my $vertical_aligner_object = $self->[_vertical_aligner_object_];
my $save_logfile = $self->[_save_logfile_];
# Flag to prevent blank lines when POD occurs in a format skipping sect.
my $in_format_skipping_section;
# set locations for blanks around long runs of keywords
my $rwant_blank_line_after = $self->keyword_group_scan();
$self->keep_old_blank_lines_exclusions($rwant_blank_line_after)
if ( $rOpts_keep_old_blank_lines == 1 );
my $line_type = EMPTY_STRING;
my $i_last_POD_END = -10;
my $i = -1;
foreach my $line_of_tokens ( @{$rlines} ) {
# insert blank lines requested for keyword sequences
if ( defined( $rwant_blank_line_after->{$i} )
&& $rwant_blank_line_after->{$i} == 1 )
{
$self->want_blank_line();
}
$i++;
my $last_line_type = $line_type;
$line_type = $line_of_tokens->{_line_type};
my $input_line = $line_of_tokens->{_line_text};
# _line_type codes are:
# SYSTEM - system-specific code before hash-bang line
# CODE - line of perl code (including comments)
# POD_START - line starting pod, such as '=head'
# POD - pod documentation text
# POD_END - last line of pod section, '=cut'
# HERE - text of here-document
# HERE_END - last line of here-doc (target word)
# FORMAT - format section
# FORMAT_END - last line of format section, '.'
# SKIP - code skipping section
# SKIP_END - last line of code skipping section, '#>>V'
# DATA_START - __DATA__ line
# DATA - unidentified text following __DATA__
# END_START - __END__ line
# END - unidentified text following __END__
# ERROR - we are in big trouble, probably not a perl script
# put a blank line after an =cut which comes before __END__ and __DATA__
# (required by podchecker)
if ( $last_line_type eq 'POD_END' && !$self->[_saw_END_or_DATA_] ) {
$i_last_POD_END = $i;
$file_writer_object->reset_consecutive_blank_lines();
if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
$self->want_blank_line();
}
}
# handle line of code..
if ( $line_type eq 'CODE' ) {
my $CODE_type = $line_of_tokens->{_code_type};
$in_format_skipping_section = $CODE_type eq 'FS';
# Handle blank lines
if ( $CODE_type eq 'BL' ) {
# Keep this blank? Start with the flag -kbl=n, where
# n=0 ignore all old blank lines
# n=1 stable: keep old blanks, but limited by -mbl=n
# n=2 keep all old blank lines, regardless of -mbl=n
# If n=0 we delete all old blank lines and let blank line
# rules generate any needed blank lines.
my $kgb_keep = $rOpts_keep_old_blank_lines;
# Then delete lines requested by the keyword-group logic if
# allowed
if ( $kgb_keep == 1
&& defined( $rwant_blank_line_after->{$i} )
&& $rwant_blank_line_after->{$i} == 2 )
{
$kgb_keep = 0;
}
# But always keep a blank line following an =cut
if ( $i - $i_last_POD_END < 3 && !$kgb_keep ) {
$kgb_keep = 1;
}
if ($kgb_keep) {
$self->flush($CODE_type);
$file_writer_object->write_blank_code_line(
$rOpts_keep_old_blank_lines == 2 );
$self->[_last_line_leading_type_] = 'b';
}
next;
}
else {
# Let logger see all non-blank lines of code. This is a slow
# operation so we avoid it if it is not going to be saved.
if ( $save_logfile && $logger_object ) {
# get updated indentation levels
my $rK_range = $line_of_tokens->{_rK_range};
my ( $Kfirst, $Klast_uu ) = @{$rK_range};
if ( defined($Kfirst) ) {
my $level_0 = $self->[_radjusted_levels_]->[$Kfirst];
my $ci_level_0 =
$self->[_rLL_]->[$Kfirst]->[_CI_LEVEL_];
$line_of_tokens->{_level_0} = $level_0;
$line_of_tokens->{_ci_level_0} = $ci_level_0;
}
$logger_object->black_box( $line_of_tokens,
$vertical_aligner_object->get_output_line_number() );
}
}
# Handle Format Skipping (FS) and Verbatim (VB) Lines
if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
$self->write_unindented_line($input_line);
$file_writer_object->reset_consecutive_blank_lines();
next;
}
# Handle all other lines of code
$self->process_line_of_CODE($line_of_tokens);
}
# handle line of non-code..
else {
# set special flags
my $skip_line = 0;
if ( substr( $line_type, 0, 3 ) eq 'POD' ) {
# Pod docs should have a preceding blank line. But stay
# out of __END__ and __DATA__ sections, because
# the user may be using this section for any purpose whatsoever
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
if ( $rOpts->{'trim-pod'} ) {
chomp $input_line;
$input_line =~ s/\s+$//;
$input_line .= "\n";
}
if ( !$skip_line
&& !$in_format_skipping_section
&& $line_type eq 'POD_START'
&& !$self->[_saw_END_or_DATA_] )
{
$self->want_blank_line();
}
}
# leave the blank counters in a predictable state
# after __END__ or __DATA__
elsif ( $line_type eq 'END_START' || $line_type eq 'DATA_START' ) {
$file_writer_object->reset_consecutive_blank_lines();
$self->[_saw_END_or_DATA_] = 1;
}
# Patch to avoid losing blank lines after a code-skipping block;
# fixes case c047.
elsif ( $line_type eq 'SKIP_END' ) {
$file_writer_object->reset_consecutive_blank_lines();
}
else {
## some other line type
}
# write unindented non-code line
if ( !$skip_line ) {
$self->write_unindented_line($input_line);
}
}
}
return;
} ## end sub process_all_lines
{ ## closure keyword_group_scan
# this is the return var
my $rhash_of_desires;
# user option variables for -kgb
my (
$rOpts_kgb_after,
$rOpts_kgb_before,
$rOpts_kgb_delete,
$rOpts_kgb_inside,
$rOpts_kgb_size_max,
$rOpts_kgb_size_min,
);
# group variables, initialized by kgb_initialize_group_vars
my ( $ibeg, $iend, $count, $level_beg, $K_closing );
my ( @iblanks, @group, @subgroup );
# line variables, updated by sub keyword_group_scan
my ( $line_type, $CODE_type, $K_first, $K_last );
my $number_of_groups_seen;
#------------------------
# -kgb helper subroutines
#------------------------
sub kgb_initialize_options {
# check and initialize user options for -kgb
# return error flag:
# true for some input error, do not continue
# false if ok
# Local copies of the various control parameters
$rOpts_kgb_after = $rOpts->{'keyword-group-blanks-after'}; # '-kgba'
$rOpts_kgb_before = $rOpts->{'keyword-group-blanks-before'}; # '-kgbb'
$rOpts_kgb_delete = $rOpts->{'keyword-group-blanks-delete'}; # '-kgbd'
$rOpts_kgb_inside = $rOpts->{'keyword-group-blanks-inside'}; # '-kgbi'
# A range of sizes can be input with decimal notation like 'min.max'
# with any number of dots between the two numbers. Examples:
# string => min max matches
# 1.1 1 1 exactly 1
# 1.3 1 3 1,2, or 3
# 1..3 1 3 1,2, or 3
# 5 5 - 5 or more
# 6. 6 - 6 or more
# .2 - 2 up to 2
# 1.0 1 0 nothing
my $rOpts_kgb_size = $rOpts->{'keyword-group-blanks-size'}; # '-kgbs'
( $rOpts_kgb_size_min, $rOpts_kgb_size_max ) = split /\.+/,
$rOpts_kgb_size;
if ( $rOpts_kgb_size_min && $rOpts_kgb_size_min !~ /^\d+$/
|| $rOpts_kgb_size_max && $rOpts_kgb_size_max !~ /^\d+$/ )
{
Warn(<<EOM);
Unexpected value for -kgbs: '$rOpts_kgb_size'; expecting 'min' or 'min.max';
ignoring all -kgb flags
EOM
# Turn this option off so that this message does not keep repeating
# during iterations and other files.
$rOpts->{'keyword-group-blanks-size'} = EMPTY_STRING;
return $rhash_of_desires;
}
$rOpts_kgb_size_min = 1 unless ($rOpts_kgb_size_min);
if ( $rOpts_kgb_size_max && $rOpts_kgb_size_max < $rOpts_kgb_size_min )
{
return $rhash_of_desires;
}
# check codes for $rOpts_kgb_before and
# $rOpts_kgb_after:
# 0 = never (delete if exist)
# 1 = stable (keep unchanged)
# 2 = always (insert if missing)
my $ok = $rOpts_kgb_size_min > 0
&& ( $rOpts_kgb_before != 1
|| $rOpts_kgb_after != 1
|| $rOpts_kgb_inside
|| $rOpts_kgb_delete );
return $rhash_of_desires if ( !$ok );
# The following parameter combination can be unstable (c302):
if ( $rOpts_kgb_size_max
&& $rOpts_kgb_after == INSERT
&& $rOpts_kgb_before == DELETE )
{
# We reset kgb_before=STABLE to fix and continue
$rOpts_kgb_before = STABLE;
}
return;
} ## end sub kgb_initialize_options
sub kgb_initialize_group_vars {
# Definitions:
# $ibeg = first line index of this entire group
# $iend = last line index of this entire group
# $count = total number of keywords seen in this entire group
# $level_beg = indentation level of this group
# @group = [ $i, $token, $count ] =list of all keywords & blanks
# @subgroup = $j, index of group where token changes
# @iblanks = line indexes of blank lines in input stream in this group
# where i=starting line index
# token (the keyword)
# count = number of this token in this subgroup
# j = index in group where token changes
$ibeg = -1;
$iend = undef;
$level_beg = -1;
$K_closing = undef;
$count = 0;
@group = ();
@subgroup = ();
@iblanks = ();
return;
} ## end sub kgb_initialize_group_vars
sub kgb_initialize_line_vars {
$CODE_type = EMPTY_STRING;
$K_first = undef;
$K_last = undef;
$line_type = EMPTY_STRING;
return;
} ## end sub kgb_initialize_line_vars
sub kgb_initialize {
# initialize all closure variables for -kgb
# return:
# true to cause immediate exit (something is wrong)
# false to continue ... all is okay
# This is the return variable:
$rhash_of_desires = {};
# initialize and check user options;
my $quit = kgb_initialize_options();
if ($quit) { return $quit }
# initialize variables for the current group and subgroups:
kgb_initialize_group_vars();
# initialize variables for the most recently seen line:
kgb_initialize_line_vars();
$number_of_groups_seen = 0;
# all okay
return;
} ## end sub kgb_initialize
sub kgb_insert_blank_after {
my ($i) = @_;
# Given:
# $i = line number after which blank is requested
$rhash_of_desires->{$i} = 1;
my $ip = $i + 1;
if ( defined( $rhash_of_desires->{$ip} )
&& $rhash_of_desires->{$ip} == 2 )
{
$rhash_of_desires->{$ip} = 0;
}
return;
} ## end sub kgb_insert_blank_after
sub kgb_split_into_sub_groups {
# place blanks around long sub-groups of keywords
# ...if requested
return unless ($rOpts_kgb_inside);
# loop over sub-groups, index k
push @subgroup, scalar(@group);
my $kbeg = 1;
my $kend = @subgroup - 1;
foreach my $k ( $kbeg .. $kend ) {
# index j runs through all keywords found
my $j_b = $subgroup[ $k - 1 ];
my $j_e = $subgroup[$k] - 1;
# index i is the actual line number of a keyword
my ( $i_b, $tok_b_uu, $count_b ) = @{ $group[$j_b] };
my ( $i_e_uu, $tok_e_uu, $count_e ) = @{ $group[$j_e] };
my $num = $count_e - $count_b + 1;
# This subgroup runs from line $ib to line $ie-1, but may contain
# blank lines
if ( $num >= $rOpts_kgb_size_min ) {
# if there are blank lines, we require that at least $num lines
# be non-blank up to the boundary with the next subgroup.
my $nog_b = my $nog_e = 1;
if ( @iblanks && !$rOpts_kgb_delete ) {
my $j_bb = $j_b + $num - 1;
my ( $i_bb_uu, $tok_bb_uu, $count_bb ) = @{ $group[$j_bb] };
$nog_b = $count_bb - $count_b + 1 == $num;
my $j_ee = $j_e - ( $num - 1 );
my ( $i_ee_uu, $tok_ee_uu, $count_ee ) = @{ $group[$j_ee] };
$nog_e = $count_e - $count_ee + 1 == $num;
}
if ( $nog_b && $k > $kbeg ) {
kgb_insert_blank_after( $i_b - 1 );
}
if ( $nog_e && $k < $kend ) {
my ( $i_ep, $tok_ep_uu, $count_ep_uu ) =
@{ $group[ $j_e + 1 ] };
kgb_insert_blank_after( $i_ep - 1 );
}
}
}
return;
} ## end sub kgb_split_into_sub_groups
sub kgb_delete_if_blank {
my ( $self, $i ) = @_;
# delete line $i if it is blank
my $rlines = $self->[_rlines_];
return if ( $i < 0 || $i >= @{$rlines} );
return if ( $rlines->[$i]->{_line_type} ne 'CODE' );
my $code_type = $rlines->[$i]->{_code_type};
if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
return;
} ## end sub kgb_delete_if_blank
sub kgb_delete_inner_blank_lines {
# always remove unwanted trailing blank lines from our list
return unless (@iblanks);
while (@iblanks) {
my $ibl = pop @iblanks;
if ( $ibl < $iend ) { push @iblanks, $ibl; last }
$iend = $ibl;
}
# now mark mark interior blank lines for deletion if requested
return unless ($rOpts_kgb_delete);
while (@iblanks) {
my $ibl = pop @iblanks;
$rhash_of_desires->{$ibl} = 2;
}
return;
} ## end sub kgb_delete_inner_blank_lines
sub kgb_end_group {
my ( $self, ($bad_ending) ) = @_;
# End a group of keywords
# Given:
# $bad_ending = false if group ends ok
# true if group ends badly (strange pattern)
if ( defined($ibeg) && $ibeg >= 0 ) {
# then handle sufficiently large groups
if ( $count >= $rOpts_kgb_size_min ) {
$number_of_groups_seen++;
# do any blank deletions regardless of the count
kgb_delete_inner_blank_lines();
my $rlines = $self->[_rlines_];
if ( $ibeg > 0 ) {
my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
# patch for hash bang line which is not currently marked as
# a comment; mark it as a comment
if ( $ibeg == 1 && !$code_type ) {
my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
$code_type = 'BC'
if ( $line_text && $line_text =~ /^#/ );
}
# Do not insert a blank after a comment
# (this could be subject to a flag in the future)
if ( $code_type !~ /(?:BC|SBC|SBCX)/ ) {
if ( $rOpts_kgb_before == INSERT ) {
kgb_insert_blank_after( $ibeg - 1 );
}
elsif ( $rOpts_kgb_before == DELETE ) {
$self->kgb_delete_if_blank( $ibeg - 1 );
}
else {
## == STABLE
}
}
}
# We will only put blanks before code lines. We could loosen
# this rule a little, but we have to be very careful because
# for example we certainly don't want to drop a blank line
# after a line like this:
# my $var = <<EOM;
if ( $line_type eq 'CODE' && defined($K_first) ) {
# - Do not put a blank before a line of different level
# - Do not put a blank line if we ended the search badly
# - Do not put a blank at the end of the file
# - Do not put a blank line before a hanging side comment
my $rLL = $self->[_rLL_];
my $level = $rLL->[$K_first]->[_LEVEL_];
my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
if ( $level == $level_beg
&& $ci_level == 0
&& !$bad_ending
&& $iend < @{$rlines}
&& $CODE_type ne 'HSC' )
{
if ( $rOpts_kgb_after == INSERT ) {
kgb_insert_blank_after($iend);
}
elsif ( $rOpts_kgb_after == DELETE ) {
$self->kgb_delete_if_blank( $iend + 1 );
}
else {
## == STABLE
}
}
}
}
kgb_split_into_sub_groups();
}
# reset for another group
kgb_initialize_group_vars();
return;
} ## end sub kgb_end_group
sub kgb_find_container_end {
my ($self) = @_;
# If the keyword line is continued onto subsequent lines, find the
# closing token '$K_closing' so that we can easily skip past the
# contents of the container.
# We only set this value if we find a simple list, meaning
# -contents only one level deep
# -not welded
# First check: skip if next line is not one deeper
my $Knext_nonblank = $self->K_next_nonblank($K_last);
return if ( !defined($Knext_nonblank) );
my $rLL = $self->[_rLL_];
my $level_next = $rLL->[$Knext_nonblank]->[_LEVEL_];
return if ( $level_next != $level_beg + 1 );
# Find the parent container of the first token on the next line
my $parent_seqno = $self->parent_seqno_by_K($Knext_nonblank);
return unless ( defined($parent_seqno) );
# Must not be a weld (can be unstable)
return
if ( $total_weld_count
&& $self->is_welded_at_seqno($parent_seqno) );
# Opening container must exist and be on this line
my $Ko = $self->[_K_opening_container_]->{$parent_seqno};
return if ( !defined($Ko) || $Ko <= $K_first || $Ko > $K_last );
# Verify that the closing container exists and is on a later line
my $Kc = $self->[_K_closing_container_]->{$parent_seqno};
return if ( !defined($Kc) || $Kc <= $K_last );
# That's it
$K_closing = $Kc;
return;
} ## end sub kgb_find_container_end
sub kgb_add_to_group {
my ( $self, $i, $token, $level ) = @_;
# End the previous group if we have reached the maximum
# group size
if ( $rOpts_kgb_size_max && @group >= $rOpts_kgb_size_max ) {
$self->kgb_end_group();
}
if ( @group == 0 ) {
$ibeg = $i;
$level_beg = $level;
$count = 0;
}
$count++;
$iend = $i;
# New sub-group?
if ( !@group || $token ne $group[-1]->[1] ) {
push @subgroup, scalar(@group);
}
push @group, [ $i, $token, $count ];
# remember if this line ends in an open container
$self->kgb_find_container_end();
return;
} ## end sub kgb_add_to_group
sub keyword_group_scan {
my $self = shift;
# Called once per file to process --keyword-group-blanks-* parameters.
# This is the main subroutine for the -kgb option
# Task:
# Manipulate blank lines around keyword groups (kgb* flags)
# Scan all lines looking for runs of consecutive lines beginning with
# selected keywords. Example keywords are 'my', 'our', 'local', ... but
# they may be anything. We will set flags requesting that blanks be
# inserted around and within them according to input parameters. Note
# that we are scanning the lines as they came in in the input stream, so
# they are not necessarily well formatted.
# Returns:
# The output of this sub is a return hash ref whose keys are the indexes
# of lines after which we desire a blank line. For line index $i:
# $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
# $rhash_of_desires->{$i} = 2 means we want blank line $i removed
# Nothing to do if no blanks can be output. This test added to fix
# case b760.
if ( !$rOpts_maximum_consecutive_blank_lines ) {
return $rhash_of_desires;
}
#---------------
# initialization
#---------------
my $quit = kgb_initialize();
if ($quit) { return $rhash_of_desires }
my $rLL = $self->[_rLL_];
my $rlines = $self->[_rlines_];
$self->kgb_end_group();
my $i = -1;
my $Opt_repeat_count =
$rOpts->{'keyword-group-blanks-repeat-count'}; # '-kgbr'
#----------------------------------
# loop over all lines of the source
#----------------------------------
foreach my $line_of_tokens ( @{$rlines} ) {
$i++;
last
if ( $Opt_repeat_count > 0
&& $number_of_groups_seen >= $Opt_repeat_count );
kgb_initialize_line_vars();
$line_type = $line_of_tokens->{_line_type};
# always end a group at non-CODE
if ( $line_type ne 'CODE' ) { $self->kgb_end_group(); next }
$CODE_type = $line_of_tokens->{_code_type};
# end any group at a format skipping line
if ( $CODE_type && $CODE_type eq 'FS' ) {
$self->kgb_end_group();
next;
}
# continue in a verbatim (VB) type; it may be quoted text
if ( $CODE_type eq 'VB' ) {
if ( $ibeg >= 0 ) { $iend = $i; }
next;
}
# and continue in blank (BL) types
if ( $CODE_type eq 'BL' ) {
if ( $ibeg >= 0 ) {
$iend = $i;
push @iblanks, $i;
# propagate current subgroup token
my $tok = $group[-1]->[1];
push @group, [ $i, $tok, $count ];
}
next;
}
# examine the first token of this line
my $rK_range = $line_of_tokens->{_rK_range};
( $K_first, $K_last ) = @{$rK_range};
if ( !defined($K_first) ) {
# Somewhat unexpected blank line..
# $rK_range is normally defined for line type CODE, but this can
# happen for example if the input line was a single semicolon
# which is being deleted. In that case there was code in the
# input file but it is not being retained. So we can silently
# return.
return $rhash_of_desires;
}
my $level = $rLL->[$K_first]->[_LEVEL_];
my $type = $rLL->[$K_first]->[_TYPE_];
my $token = $rLL->[$K_first]->[_TOKEN_];
my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
# End a group 'badly' at an unexpected level. This will prevent
# blank lines being incorrectly placed after the end of the group.
# We are looking for any deviation from two acceptable patterns:
# PATTERN 1: a simple list; secondary lines are at level+1
# PATTERN 2: a long statement; all secondary lines same level
# This was added as a fix for case b1177, in which a complex
# structure got incorrectly inserted blank lines.
if ( $ibeg >= 0 ) {
# Check for deviation from PATTERN 1, simple list:
if ( defined($K_closing) && $K_first < $K_closing ) {
$self->kgb_end_group(1) if ( $level != $level_beg + 1 );
}
# Check for deviation from PATTERN 2, single statement:
elsif ( $level != $level_beg ) { $self->kgb_end_group(1) }
else {
## no deviation
}
}
# Do not look for keywords in lists ( keyword 'my' can occur in
# lists, see case b760); fixed for c048.
# Switch from ->is_list_by_K to !->is_in_block_by_K to fix b1464
if ( !$self->is_in_block_by_K($K_first) ) {
if ( $ibeg >= 0 ) { $iend = $i }
next;
}
# see if this is a code type we seek (i.e. comment)
if ( $CODE_type
&& $keyword_group_list_comment_pattern
&& $CODE_type =~ /$keyword_group_list_comment_pattern/ )
{
my $tok = $CODE_type;
# Continuing a group
if ( $ibeg >= 0 && $level == $level_beg ) {
$self->kgb_add_to_group( $i, $tok, $level );
}
# Start new group
else {
# first end old group if any; we might be starting new
# keywords at different level
if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
$self->kgb_add_to_group( $i, $tok, $level );
}
next;
}
# See if it is a keyword we seek, but never start a group in a
# continuation line; the code may be badly formatted.
if ( $ci_level == 0
&& $type eq 'k'
&& $token =~ /$keyword_group_list_pattern/ )
{
# Continuing a keyword group
if ( $ibeg >= 0 && $level == $level_beg ) {
$self->kgb_add_to_group( $i, $token, $level );
}
# Start new keyword group
else {
# first end old group if any; we might be starting new
# keywords at different level
if ( $ibeg >= 0 ) { $self->kgb_end_group(); }
$self->kgb_add_to_group( $i, $token, $level );
}
next;
}
# This is not one of our keywords, but we are in a keyword group
# so see if we should continue or quit
elsif ( $ibeg >= 0 ) {
# - bail out on a large level change; we may have walked into a
# data structure or anonymous sub code.
if ( $level > $level_beg + 1 || $level < $level_beg ) {
$self->kgb_end_group(1);
next;
}
# - keep going on a continuation line of the same level, since
# it is probably a continuation of our previous keyword,
# - and keep going past hanging side comments because we never
# want to interrupt them.
if ( ( ( $level == $level_beg ) && $ci_level > 0 )
|| $CODE_type eq 'HSC' )
{
$iend = $i;
next;
}
# - continue if if we are within in a container which started
# with the line of the previous keyword.
if ( defined($K_closing) && $K_first <= $K_closing ) {
# continue if entire line is within container
if ( $K_last <= $K_closing ) { $iend = $i; next }
# continue at ); or }; or ];
my $KK = $K_closing + 1;
if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
if ( $KK < $K_last ) {
if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' )
{
$self->kgb_end_group(1);
next;
}
}
$iend = $i;
next;
}
$self->kgb_end_group(1);
next;
}
# - end the group if none of the above
$self->kgb_end_group();
next;
}
# not in a keyword group; continue
else { next }
} ## end of loop over all lines
$self->kgb_end_group();
return $rhash_of_desires;
} ## end sub keyword_group_scan
} ## end closure keyword_group_scan
#######################################
# CODE SECTION 7: Process lines of code
#######################################
{ ## begin closure process_line_of_CODE
# The routines in this closure receive lines of code and combine them into
# 'batches' and send them along. A 'batch' is the unit of code which can be
# processed further as a unit. It has the property that it is the largest
# amount of code into which which perltidy is free to place one or more
# line breaks within it without violating any constraints.
# When a new batch is formed it is sent to sub 'grind_batch_of_code'.
# flags needed by the store routine
my $line_of_tokens;
my $no_internal_newlines;
my $CODE_type;
my $current_line_starts_in_quote;
# range of K of tokens for the current line
my ( $K_first, $K_last );
my ( $rLL, $radjusted_levels, $rparent_of_seqno, $rdepth_of_opening_seqno,
$rblock_type_of_seqno, $ri_starting_one_line_block );
# past stored nonblank tokens and flags
my (
$K_last_nonblank_code, $K_dangling_elsif,
$is_static_block_comment, $last_CODE_type,
$last_line_had_side_comment, $next_parent_seqno,
$next_slevel,
);
# Called once at the start of a new file
sub initialize_process_line_of_CODE {
$K_last_nonblank_code = undef;
$K_dangling_elsif = 0;
$is_static_block_comment = 0;
$last_line_had_side_comment = 0;
$next_parent_seqno = SEQ_ROOT;
$next_slevel = undef;
return;
} ## end sub initialize_process_line_of_CODE
# Batch variables: these describe the current batch of code being formed
# and sent down the pipeline. They are initialized in the next
# sub.
my (
$rbrace_follower, $index_start_one_line_block,
$starting_in_quote, $ending_in_quote,
);
# Called before the start of each new batch
sub initialize_batch_variables {
# Initialize array values for a new batch. Any changes here must be
# carefully coordinated with sub store_token_to_go.
$max_index_to_go = UNDEFINED_INDEX;
$summed_lengths_to_go[0] = 0;
$nesting_depth_to_go[0] = 0;
$ri_starting_one_line_block = [];
# Redefine some sparse arrays.
# It is more efficient to redefine these sparse arrays and rely on
# undef's instead of initializing to 0's. Testing showed that using
# @array=() is more efficient than $#array=-1
@old_breakpoint_to_go = ();
@forced_breakpoint_to_go = ();
@block_type_to_go = ();
@mate_index_to_go = ();
@type_sequence_to_go = ();
# NOTE: @nobreak_to_go is sparse and could be treated this way, but
# testing showed that there would be very little efficiency gain
# because an 'if' test must be added in store_token_to_go.
# The initialization code for the remaining batch arrays is as follows
# and can be activated for testing. But profiling shows that it is
# time-consuming to re-initialize the batch arrays and is not necessary
# because the maximum valid token, $max_index_to_go, is carefully
# controlled. This means however that it is not possible to do any
# type of filter or map operation directly on these arrays. And it is
# not possible to use negative indexes. As a precaution against program
# changes which might do this, sub pad_array_to_go adds some undefs at
# the end of the current batch of data.
## 0 && do { #<<<
## @nobreak_to_go = ();
## @token_lengths_to_go = ();
## @levels_to_go = ();
## @ci_levels_to_go = ();
## @tokens_to_go = ();
## @K_to_go = ();
## @types_to_go = ();
## @leading_spaces_to_go = ();
## @reduced_spaces_to_go = ();
## @inext_to_go = ();
## @parent_seqno_to_go = ();
## };
$rbrace_follower = undef;
$ending_in_quote = 0;
$index_start_one_line_block = undef;
# initialize forced breakpoint vars associated with each output batch
$forced_breakpoint_count = 0;
$index_max_forced_break = UNDEFINED_INDEX;
$forced_breakpoint_undo_count = 0;
return;
} ## end sub initialize_batch_variables
sub leading_spaces_to_go {
my ($ii) = @_;
# Return the number of indentation spaces for token at index $ii
# in the output stream
return 0 if ( $ii < 0 );
my $indentation = $leading_spaces_to_go[$ii];
return ref($indentation) ? $indentation->get_spaces() : $indentation;
} ## end sub leading_spaces_to_go
sub create_one_line_block {
# note that this updates a closure variable
$index_start_one_line_block = shift;
# Set index starting next one-line block
# Given:
# $index_start_one_line_block = starting index in _to_go array
# undef => end current one-line block
#
# call with no args to delete the current one-line block
return;
} ## end sub create_one_line_block
# Routine to place the current token into the output stream.
# Called once per output token.
use constant DEBUG_STORE => 0;
sub store_token_to_go {
my ( $self, $Ktoken_vars, $rtoken_vars ) = @_;
#-------------------------------------------------------
# Token storage utility for sub process_line_of_CODE.
# Add one token to the next batch of '_to_go' variables.
#-------------------------------------------------------
# Input parameters:
# $Ktoken_vars = the index K in the global token array
# $rtoken_vars = $rLL->[$Ktoken_vars] = the corresponding token values
# unless they are temporarily being overridden
#------------------------------------------------------------------
# NOTE: called once per token so coding efficiency is critical here.
# All changes need to be benchmarked with Devel::NYTProf.
#------------------------------------------------------------------
my (
$type,
$token,
$ci_level,
$level,
$seqno,
$length,
) = @{$rtoken_vars}[
_TYPE_,
_TOKEN_,
_CI_LEVEL_,
_LEVEL_,
_TYPE_SEQUENCE_,
_TOKEN_LENGTH_,
];
# Check for emergency flush...
# The K indexes in the batch must always be a continuous sequence of
# the global token array. The batch process programming assumes this.
# If storing this token would cause this relation to fail we must dump
# the current batch before storing the new token. It is extremely rare
# for this to happen. One known example is the following two-line
# snippet when run with parameters
# --noadd-newlines --space-terminal-semicolon:
# if ( $_ =~ /PENCIL/ ) { $pencil_flag= 1 } ; ;
# $yy=1;
if ( $max_index_to_go >= 0 ) {
if ( $Ktoken_vars != $K_to_go[$max_index_to_go] + 1 ) {
$self->flush_batch_of_CODE();
}
# Do not output consecutive blank tokens ... this should not
# happen, but it is worth checking. Later code can then make the
# simplifying assumption that blank tokens are not consecutive.
elsif ( $type eq 'b' && $types_to_go[$max_index_to_go] eq 'b' ) {
if (DEVEL_MODE) {
# if this happens, it is may be that consecutive blanks
# were inserted into the token stream in 'respace_tokens'
my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
Fault("consecutive blanks near line $lno; please fix");
}
return;
}
else {
## all ok
}
}
# Do not start a batch with a blank token.
# Fixes cases b149 b888 b984 b985 b986 b987
else {
if ( $type eq 'b' ) { return }
}
# Update counter and do initializations if first token of new batch
if ( !++$max_index_to_go ) {
# Reset flag '$starting_in_quote' for a new batch. It must be set
# to the value of '$in_continued_quote', but here for efficiency we
# set it to zero, which is its normal value. Then in coding below
# we will change it if we find we are actually in a continued quote.
$starting_in_quote = 0;
# Update the next parent sequence number for each new batch.
#----------------------------------------
# Begin coding from sub parent_seqno_by_K
#----------------------------------------
# The following is equivalent to this call but much faster:
# $next_parent_seqno = $self->parent_seqno_by_K($Ktoken_vars);
$next_parent_seqno = SEQ_ROOT;
if ($seqno) {
$next_parent_seqno = $rparent_of_seqno->{$seqno};
}
else {
my $Kt = $self->[_rK_next_seqno_by_K_]->[$Ktoken_vars];
if ( defined($Kt) ) {
# if next container token is closing, it is the parent seqno
if ( $is_closing_type{ $rLL->[$Kt]->[_TYPE_] } ) {
$next_parent_seqno = $rLL->[$Kt]->[_TYPE_SEQUENCE_];
}
# otherwise we want its parent container
else {
$next_parent_seqno =
$rparent_of_seqno->{ $rLL->[$Kt]->[_TYPE_SEQUENCE_] };
}
}
}
$next_parent_seqno = SEQ_ROOT
if ( !defined($next_parent_seqno) );
#--------------------------------------
# End coding from sub parent_seqno_by_K
#--------------------------------------
$next_slevel = $rdepth_of_opening_seqno->[$next_parent_seqno] + 1;
}
# Safety check that length is defined. This is slow and should not be
# needed now, so just do it in DEVEL_MODE to check programming changes.
# Formerly needed for --indent-only, in which the entire set of tokens
# is normally turned into type 'q'. Lengths are now defined in sub
# 'respace_tokens' so this check is no longer needed.
if ( DEVEL_MODE && !defined($length) ) {
my $lno = $rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
$length = length($token);
Fault(<<EOM);
undefined length near line $lno; num chars=$length, token='$token'
EOM
}
#----------------------------
# add this token to the batch
#----------------------------
$K_to_go[$max_index_to_go] = $Ktoken_vars;
$types_to_go[$max_index_to_go] = $type;
$tokens_to_go[$max_index_to_go] = $token;
$ci_levels_to_go[$max_index_to_go] = $ci_level;
$levels_to_go[$max_index_to_go] = $level;
$nobreak_to_go[$max_index_to_go] = $no_internal_newlines;
$token_lengths_to_go[$max_index_to_go] = $length;
# Skip point initialization for these sparse arrays - undef's okay;
# See also related code in sub initialize_batch_variables.
## $old_breakpoint_to_go[$max_index_to_go] = 0;
## $forced_breakpoint_to_go[$max_index_to_go] = 0;
## $block_type_to_go[$max_index_to_go] = EMPTY_STRING;
## $type_sequence_to_go[$max_index_to_go] = $seqno;
# NOTE: nobreak_to_go can be treated as a sparse array, but testing
# showed that there is almost no efficiency gain because an if test
# would need to be added.
# We keep a running sum of token lengths from the start of this batch:
# summed_lengths_to_go[$i] = total length to just before token $i
# summed_lengths_to_go[$i+1] = total length to just after token $i
$summed_lengths_to_go[ $max_index_to_go + 1 ] =
$summed_lengths_to_go[$max_index_to_go] + $length;
# Initialize some sequence-dependent variables to their normal values
$parent_seqno_to_go[$max_index_to_go] = $next_parent_seqno;
$nesting_depth_to_go[$max_index_to_go] = $next_slevel;
# Then fix them at container tokens:
if ($seqno) {
$type_sequence_to_go[$max_index_to_go] = $seqno;
$block_type_to_go[$max_index_to_go] =
$rblock_type_of_seqno->{$seqno};
if ( $is_opening_token{$token} ) {
my $slevel = $rdepth_of_opening_seqno->[$seqno];
$nesting_depth_to_go[$max_index_to_go] = $slevel;
$next_slevel = $slevel + 1;
$next_parent_seqno = $seqno;
}
elsif ( $is_closing_token{$token} ) {
$next_slevel = $rdepth_of_opening_seqno->[$seqno];
my $slevel = $next_slevel + 1;
$nesting_depth_to_go[$max_index_to_go] = $slevel;
my $parent_seqno = $rparent_of_seqno->{$seqno};
$parent_seqno = SEQ_ROOT unless defined($parent_seqno);
$parent_seqno_to_go[$max_index_to_go] = $parent_seqno;
$next_parent_seqno = $parent_seqno;
}
else {
# ternary token: nothing to do
}
}
# Define the indentation that this token will have in two cases:
# Without CI = reduced_spaces_to_go
# With CI = leading_spaces_to_go
$leading_spaces_to_go[$max_index_to_go] =
$reduced_spaces_to_go[$max_index_to_go] =
$rOpts_indent_columns * $radjusted_levels->[$Ktoken_vars];
if ($ci_level) {
$leading_spaces_to_go[$max_index_to_go] +=
$rOpts_continuation_indentation;
}
# Correct these values if we are starting in a continued quote
if ( $current_line_starts_in_quote
&& $Ktoken_vars == $K_first )
{
# in a continued quote - correct value set above if first token
if ( $max_index_to_go == 0 ) { $starting_in_quote = 1 }
$leading_spaces_to_go[$max_index_to_go] = 0;
$reduced_spaces_to_go[$max_index_to_go] = 0;
}
DEBUG_STORE && do {
my ( $pkg, $file_uu, $lno ) = caller();
print {*STDOUT}
"STORE: from $pkg $lno: storing token $token type $type lev=$level at $max_index_to_go\n";
};
return;
} ## end sub store_token_to_go
sub flush_batch_of_CODE {
my ($self) = @_;
# Finish and process the current batch.
# This must be the only call to grind_batch_of_CODE()
return if ( $max_index_to_go < 0 );
# Create an array to hold variables for this batch
my $this_batch = $self->[_this_batch_] = [];
$this_batch->[_starting_in_quote_] = 1 if ($starting_in_quote);
$this_batch->[_ending_in_quote_] = 1 if ($ending_in_quote);
if ( $CODE_type || $last_CODE_type ) {
$this_batch->[_batch_CODE_type_] =
$K_to_go[$max_index_to_go] >= $K_first
? $CODE_type
: $last_CODE_type;
}
$last_line_had_side_comment =
( $max_index_to_go > 0 && $types_to_go[$max_index_to_go] eq '#' );
# The flag $is_static_block_comment applies to the line which just
# arrived. So it only applies if we are outputting that line.
if ( $is_static_block_comment && !$last_line_had_side_comment ) {
$this_batch->[_is_static_block_comment_] = $K_to_go[0] == $K_first;
}
$this_batch->[_ri_starting_one_line_block_] =
$ri_starting_one_line_block;
#-------------------
# process this batch
#-------------------
$self->grind_batch_of_CODE();
# Done .. this batch is history
initialize_batch_variables();
return;
} ## end sub flush_batch_of_CODE
sub end_batch {
# End the current batch, EXCEPT for a few special cases
my ($self) = @_;
if ( $max_index_to_go < 0 ) {
# nothing to do .. this is harmless but wastes time.
if (DEVEL_MODE) {
Fault("sub end_batch called with nothing to do; please fix\n");
}
return;
}
# Exceptions when a line does not end with a comment... (fixes c058)
if ( $types_to_go[$max_index_to_go] ne '#' ) {
# Exception 1: Do not end line in a weld
return
if ( $total_weld_count
&& $self->[_rK_weld_right_]->{ $K_to_go[$max_index_to_go] } );
# Exception 2: just set a tentative breakpoint if we might be in a
# one-line block
if ( defined($index_start_one_line_block) ) {
$self->set_forced_breakpoint($max_index_to_go);
return;
}
}
$self->flush_batch_of_CODE();
return;
} ## end sub end_batch
sub flush_vertical_aligner {
my ($self) = @_;
my $vao = $self->[_vertical_aligner_object_];
$vao->flush();
return;
} ## end sub flush_vertical_aligner
sub flush {
my ( $self, ($CODE_type_flush) ) = @_;
# Sub flush is called to output any tokens in the pipeline, so that
# an alternate source of lines can be written in the correct order
# Optional parameter:
# $CODE_type_flush = 'BL' for flushing to insert a blank line
$index_start_one_line_block = undef;
# End the current batch, if it holds any tokens, with 1 exception
if ( $max_index_to_go >= 0 ) {
# Exception: if we are flushing within the code stream only to
# insert blank line(s), then we can keep the batch intact at a
# weld. This improves formatting of -ce. See test 'ce1.ce'
if ( $CODE_type_flush && $CODE_type_flush eq 'BL' ) {
$self->end_batch();
}
# otherwise, we have to shut things down completely.
else { $self->flush_batch_of_CODE() }
}
$self->flush_vertical_aligner();
return;
} ## end sub flush
my %is_assignment_or_fat_comma;
BEGIN {
%is_assignment_or_fat_comma = %is_assignment;
$is_assignment_or_fat_comma{'=>'} = 1;
}
sub add_missing_else {
my ($self) = @_;
# Add a missing 'else' block.
# $K_dangling_elsif = index of closing elsif brace not followed by else
# Make sure everything looks okay
if ( !$K_dangling_elsif
|| $K_dangling_elsif < $K_first
|| $rLL->[$K_dangling_elsif]->[_TYPE_] ne '}' )
{
DEVEL_MODE && Fault("could not find closing elsif brace\n");
}
my $comment = $rOpts->{'add-missing-else-comment'};
# Safety check
if ( substr( $comment, 0, 1 ) ne '#' ) { $comment = '#' . $comment }
# Calculate indentation
my $level = $radjusted_levels->[$K_dangling_elsif];
my $spaces = SPACE x ( $level * $rOpts_indent_columns );
my $line1 = $spaces . "else {\n";
my $line3 = $spaces . "}\n";
$spaces .= SPACE x $rOpts_indent_columns;
my $line2 = $spaces . $comment . "\n";
# clear the output pipeline
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_code_line($line1);
$file_writer_object->write_code_line($line2);
$file_writer_object->write_code_line($line3);
return;
} ## end sub add_missing_else
sub process_line_of_CODE {
my ( $self, $my_line_of_tokens ) = @_;
#----------------------------------------------------------------
# This routine is called once per INPUT line to format all of the
# tokens on that line.
#----------------------------------------------------------------
# It outputs full-line comments and blank lines immediately.
# For lines of code:
# - Tokens are copied one-by-one from the global token
# array $rLL to a set of '_to_go' arrays which collect batches of
# tokens. This is done with calls to 'store_token_to_go'.
# - A batch is closed and processed upon reaching a well defined
# structural break point (i.e. code block boundary) or forced
# breakpoint (i.e. side comment or special user controls).
# - Subsequent stages of formatting make additional line breaks
# appropriate for lists and logical structures, and as necessary to
# keep line lengths below the requested maximum line length.
#-----------------------------------
# begin initialize closure variables
#-----------------------------------
$line_of_tokens = $my_line_of_tokens;
my $rK_range = $line_of_tokens->{_rK_range};
if ( !defined( $rK_range->[0] ) ) {
# Empty line: This can happen if tokens are deleted, for example
# with the -mangle parameter
return;
}
( $K_first, $K_last ) = @{$rK_range};
$last_CODE_type = $CODE_type;
$CODE_type = $line_of_tokens->{_code_type};
$current_line_starts_in_quote = $line_of_tokens->{_starting_in_quote};
$rLL = $self->[_rLL_];
$radjusted_levels = $self->[_radjusted_levels_];
$rparent_of_seqno = $self->[_rparent_of_seqno_];
$rdepth_of_opening_seqno = $self->[_rdepth_of_opening_seqno_];
$rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
#---------------------------------
# end initialize closure variables
#---------------------------------
# This flag will become nobreak_to_go and should be set to 2 to prevent
# a line break AFTER the current token.
$no_internal_newlines = 0;
if ( !$rOpts_add_newlines || $CODE_type eq 'NIN' ) {
$no_internal_newlines = 2;
}
my $input_line = $line_of_tokens->{_line_text};
my ( $is_block_comment, $has_side_comment );
if ( $rLL->[$K_last]->[_TYPE_] eq '#' ) {
if ( $K_last == $K_first && $CODE_type ne 'HSC' ) {
$is_block_comment = 1;
}
else { $has_side_comment = 1 }
}
my $is_static_block_comment_without_leading_space =
$CODE_type eq 'SBCX';
$is_static_block_comment =
$CODE_type eq 'SBC' || $is_static_block_comment_without_leading_space;
# check for a $VERSION statement
if ( $CODE_type eq 'VER' ) {
$self->[_saw_VERSION_in_this_file_] = 1;
$no_internal_newlines = 2;
}
# Add interline blank if any
my $last_old_nonblank_type = "b";
my $first_new_nonblank_token = EMPTY_STRING;
my $K_first_true = $K_first;
if ( $max_index_to_go >= 0 ) {
$last_old_nonblank_type = $types_to_go[$max_index_to_go];
$first_new_nonblank_token = $rLL->[$K_first]->[_TOKEN_];
if ( !$is_block_comment
&& $types_to_go[$max_index_to_go] ne 'b'
&& $K_first > 0
&& $rLL->[ $K_first - 1 ]->[_TYPE_] eq 'b' )
{
$K_first -= 1;
}
}
my $rtok_first = $rLL->[$K_first];
my $in_quote = $line_of_tokens->{_ending_in_quote};
$ending_in_quote = $in_quote;
#------------------------------------
# Handle a block (full-line) comment.
#------------------------------------
if ($is_block_comment) {
if ( $rOpts->{'delete-block-comments'} ) {
$self->flush();
return;
}
$index_start_one_line_block = undef;
$self->end_batch() if ( $max_index_to_go >= 0 );
# output a blank line before block comments
if (
# unless we follow a blank or comment line
$self->[_last_line_leading_type_] ne '#'
&& $self->[_last_line_leading_type_] ne 'b'
# only if allowed
&& $rOpts->{'blanks-before-comments'}
# if this is NOT an empty comment, unless it follows a side
# comment and could become a hanging side comment.
&& (
$rtok_first->[_TOKEN_] ne '#'
|| ( $last_line_had_side_comment
&& $rLL->[$K_first]->[_LEVEL_] > 0 )
)
# not after a short line ending in an opening token
# because we already have space above this comment.
# Note that the first comment in this if block, after
# the 'if (', does not get a blank line because of this.
&& !$self->[_last_output_short_opening_token_]
# never before static block comments
&& !$is_static_block_comment
)
{
$self->flush(); # switching to new output stream
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_blank_code_line();
$self->[_last_line_leading_type_] = 'b';
}
if (
$rOpts->{'indent-block-comments'}
&& ( !$rOpts->{'indent-spaced-block-comments'}
|| $input_line =~ /^\s+/ )
&& !$is_static_block_comment_without_leading_space
)
{
my $Ktoken_vars = $K_first;
my $rtoken_vars = $rLL->[$Ktoken_vars];
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch();
}
else {
# switching to new output stream
$self->flush();
# Note that last arg in call here is 'undef' for comments
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_code_line(
$rtok_first->[_TOKEN_] . "\n", undef );
$self->[_last_line_leading_type_] = '#';
}
return;
}
#--------------------------------------------
# Compare input/output indentation in logfile
#--------------------------------------------
if ( $self->[_save_logfile_] ) {
my $guessed_indentation_level =
$line_of_tokens->{_guessed_indentation_level};
# Compare input/output indentation except for:
# - hanging side comments
# - continuation lines (have unknown leading blank space)
# - and lines which are quotes (they may have been outdented)
my $exception =
$CODE_type eq 'HSC'
|| $rtok_first->[_CI_LEVEL_] > 0
|| $guessed_indentation_level == 0
&& $rtok_first->[_TYPE_] eq 'Q';
if ( !$exception ) {
my $input_line_number = $line_of_tokens->{_line_number};
$self->compare_indentation_levels( $K_first,
$guessed_indentation_level, $input_line_number );
}
}
#-----------------------------------------
# Handle a line marked as indentation-only
#-----------------------------------------
if ( $CODE_type eq 'IO' ) {
$self->flush();
my $line = $input_line;
# Fix for rt #125506 Unexpected string formatting
# in which leading space of a terminal quote was removed
$line =~ s/\s+$//;
$line =~ s/^\s+// unless ( $line_of_tokens->{_starting_in_quote} );
my $Ktoken_vars = $K_first;
# We work with a copy of the token variables and change the
# first token to be the entire line as a quote variable
my $rtoken_vars = $rLL->[$Ktoken_vars];
$rtoken_vars = copy_token_as_type( $rtoken_vars, 'q', $line );
# Patch: length is not really important here but must be defined
$rtoken_vars->[_TOKEN_LENGTH_] = length($line);
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch();
return;
}
#---------------------------
# Handle all other lines ...
#---------------------------
$K_dangling_elsif = 0;
# This is a good place to kill incomplete one-line blocks
if ( $max_index_to_go >= 0 ) {
# For -iob and -lp, mark essential old breakpoints.
# Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
# See related code below.
if ( $rOpts_ignore_old_breakpoints && $rOpts_line_up_parentheses ) {
my $type_first = $rLL->[$K_first_true]->[_TYPE_];
if ( $is_assignment_or_fat_comma{$type_first} ) {
$old_breakpoint_to_go[$max_index_to_go] = 1;
}
}
if (
# this check needed -mangle (for example rt125012)
(
( !$index_start_one_line_block )
&& ( $last_old_nonblank_type eq ';' )
&& ( $first_new_nonblank_token ne '}' )
)
# Patch for RT #98902. Honor request to break at old commas.
|| ( $rOpts_break_at_old_comma_breakpoints
&& $last_old_nonblank_type eq ',' )
)
{
$forced_breakpoint_to_go[$max_index_to_go] = 1
if ($rOpts_break_at_old_comma_breakpoints);
$index_start_one_line_block = undef;
$self->end_batch();
}
# Keep any requested breaks before this line. Note that we have to
# use the original K_first because it may have been reduced above
# to add a blank. The value of the flag is as follows:
# 1 => hard break, flush the batch
# 2 => soft break, set breakpoint and continue building the batch
# added check on max_index_to_go for c177
if ( $max_index_to_go >= 0
&& $self->[_rbreak_before_Kfirst_]->{$K_first_true} )
{
$index_start_one_line_block = undef;
if ( $self->[_rbreak_before_Kfirst_]->{$K_first_true} == 2 ) {
$self->set_forced_breakpoint($max_index_to_go);
}
else {
$self->end_batch();
}
}
}
#--------------------------------------
# loop to process the tokens one-by-one
#--------------------------------------
$self->process_line_inner_loop($has_side_comment);
# if there is anything left in the output buffer ...
if ( $max_index_to_go >= 0 ) {
my $type = $rLL->[$K_last]->[_TYPE_];
my $break_flag = $self->[_rbreak_after_Klast_]->{$K_last};
# we have to flush ..
if (
# if there is a side comment...
$type eq '#'
# if this line ends in a quote
# NOTE: This is critically important for insuring that quoted
# lines do not get processed by things like -sot and -sct
|| $in_quote
# if this is a VERSION statement
|| $CODE_type eq 'VER'
# to keep a label at the end of a line
|| ( $type eq 'J' && $rOpts_break_after_labels != 2 )
# if we have a hard break request
|| $break_flag && $break_flag != 2
# if we are instructed to keep all old line breaks
|| !$rOpts->{'delete-old-newlines'}
# if this is a line of the form 'use overload'. A break here in
# the input file is a good break because it will allow the
# operators which follow to be formatted well. Without this
# break the formatting with -ci=4 -xci is poor, for example.
# use overload
# '+' => sub {
# print length $_[2], "\n";
# my ( $x, $y ) = _order(@_);
# Number::Roman->new( int $x + $y );
# },
# '-' => sub {
# my ( $x, $y ) = _order(@_);
# Number::Roman->new( int $x - $y );
# };
|| ( $max_index_to_go == 2
&& $types_to_go[0] eq 'k'
&& $tokens_to_go[0] eq 'use'
&& $tokens_to_go[$max_index_to_go] eq 'overload' )
)
{
$index_start_one_line_block = undef;
$self->end_batch();
}
else {
# Check for a soft break request
if ( $break_flag && $break_flag == 2 ) {
$self->set_forced_breakpoint($max_index_to_go);
}
# mark old line breakpoints in current output stream
if (
!$rOpts_ignore_old_breakpoints
# Mark essential old breakpoints if combination -iob -lp is
# used. These two options do not work well together, but
# we can avoid turning -iob off by ignoring -iob at certain
# essential line breaks. See also related code above.
# Fixes b1021 b1023 b1034 b1048 b1049 b1050 b1056 b1058
|| ( $rOpts_line_up_parentheses
&& $is_assignment_or_fat_comma{$type} )
)
{
$old_breakpoint_to_go[$max_index_to_go] = 1;
}
}
}
if ( $K_dangling_elsif && $rOpts_add_missing_else ) {
$self->add_missing_else();
}
return;
} ## end sub process_line_of_CODE
sub process_line_inner_loop {
my ( $self, $has_side_comment ) = @_;
#--------------------------------------------------------------------
# Loop to move all tokens from one input line to a newly forming batch
#--------------------------------------------------------------------
# Do not start a new batch with a blank space
if ( $max_index_to_go < 0 && $rLL->[$K_first]->[_TYPE_] eq 'b' ) {
$K_first++;
}
foreach my $Ktoken_vars ( $K_first .. $K_last ) {
my $rtoken_vars = $rLL->[$Ktoken_vars];
#--------------
# handle blanks
#--------------
if ( $rtoken_vars->[_TYPE_] eq 'b' ) {
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
next;
}
#------------------
# handle non-blanks
#------------------
my $type = $rtoken_vars->[_TYPE_];
# If we are continuing after seeing a right curly brace, flush
# buffer unless we see what we are looking for, as in
# } else ...
if ($rbrace_follower) {
my $token = $rtoken_vars->[_TOKEN_];
if ( !$rbrace_follower->{$token} ) {
$self->end_batch() if ( $max_index_to_go >= 0 );
}
$rbrace_follower = undef;
}
my (
$block_type, $type_sequence,
$is_opening_BLOCK, $is_closing_BLOCK,
$nobreak_BEFORE_BLOCK
);
if ( $rtoken_vars->[_TYPE_SEQUENCE_] ) {
my $token = $rtoken_vars->[_TOKEN_];
$type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
$block_type = $rblock_type_of_seqno->{$type_sequence};
if ( $block_type
&& $token eq $type
&& $block_type ne 't'
&& !$self->[_rshort_nested_]->{$type_sequence} )
{
if ( $type eq '{' ) {
$is_opening_BLOCK = 1;
$nobreak_BEFORE_BLOCK = $no_internal_newlines;
}
elsif ( $type eq '}' ) {
$is_closing_BLOCK = 1;
$nobreak_BEFORE_BLOCK = $no_internal_newlines;
}
else {
## error - block should be enclosed by curly brace
DEVEL_MODE && Fault(<<EOM);
block type '$block_type' has unexpected container type '$type'
EOM
}
}
}
#---------------------
# handle side comments
#---------------------
if ($has_side_comment) {
# if at last token ...
if ( $Ktoken_vars == $K_last ) {
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
next;
}
# if before last token ... do not allow breaks which would
# promote a side comment to a block comment
if ( $Ktoken_vars == $K_last - 1
|| $Ktoken_vars == $K_last - 2
&& $rLL->[ $K_last - 1 ]->[_TYPE_] eq 'b' )
{
$no_internal_newlines = 2;
}
}
# Process non-blank and non-comment tokens ...
#-----------------
# handle semicolon
#-----------------
if ( $type eq ';' ) {
my $next_nonblank_token_type = 'b';
my $next_nonblank_token = EMPTY_STRING;
if ( $Ktoken_vars < $K_last ) {
my $Knnb = $Ktoken_vars + 1;
$Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
$next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
$next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
}
if ( $rOpts_break_at_old_semicolon_breakpoints
&& ( $Ktoken_vars == $K_first )
&& $max_index_to_go >= 0
&& !defined($index_start_one_line_block) )
{
$self->end_batch();
}
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
$self->end_batch()
if (
!$no_internal_newlines
&& ( !$rOpts_keep_interior_semicolons
|| $Ktoken_vars >= $K_last )
&& ( $next_nonblank_token ne '}' )
);
}
#-----------
# handle '{'
#-----------
elsif ($is_opening_BLOCK) {
# Tentatively output this token. This is required before
# calling starting_one_line_block. We may have to unstore
# it, though, if we have to break before it.
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# Look ahead to see if we might form a one-line block..
my $too_long =
$self->starting_one_line_block( $Ktoken_vars,
$K_last_nonblank_code, $K_last );
$self->clear_breakpoint_undo_stack();
# to simplify the logic below, set a flag to indicate if
# this opening brace is far from the keyword which introduces it
my $keyword_on_same_line = 1;
if (
$max_index_to_go >= 0
&& defined($K_last_nonblank_code)
&& $rLL->[$K_last_nonblank_code]->[_TYPE_] eq ')'
&& ( ( $rtoken_vars->[_LEVEL_] < $levels_to_go[0] )
|| $too_long )
)
{
$keyword_on_same_line = 0;
}
# Break before '{' if requested with -bl or -bli flag
my $want_break = $self->[_rbrace_left_]->{$type_sequence};
# But do not break if this token is welded to the left
if ( $total_weld_count
&& defined( $self->[_rK_weld_left_]->{$Ktoken_vars} ) )
{
$want_break = 0;
}
# Break BEFORE an opening '{' ...
if (
# if requested
$want_break
# and we were unable to start looking for a block,
&& !defined($index_start_one_line_block)
# or if it will not be on same line as its keyword, so that
# it will be outdented (eval.t, overload.t), and the user
# has not insisted on keeping it on the right
|| ( !$keyword_on_same_line
&& !$rOpts_opening_brace_always_on_right )
)
{
# but only if allowed
if ( !$nobreak_BEFORE_BLOCK ) {
# since we already stored this token, we must unstore it
$self->unstore_token_to_go();
# then output the line
$self->end_batch() if ( $max_index_to_go >= 0 );
# and now store this token at the start of a new line
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
}
# now output this line
$self->end_batch()
if ( $max_index_to_go >= 0 && !$no_internal_newlines );
}
#-----------
# handle '}'
#-----------
elsif ($is_closing_BLOCK) {
my $next_nonblank_token_type = 'b';
my $next_nonblank_token = EMPTY_STRING;
my $Knnb;
if ( $Ktoken_vars < $K_last ) {
$Knnb = $Ktoken_vars + 1;
$Knnb++ if ( $rLL->[$Knnb]->[_TYPE_] eq 'b' );
$next_nonblank_token = $rLL->[$Knnb]->[_TOKEN_];
$next_nonblank_token_type = $rLL->[$Knnb]->[_TYPE_];
}
# If there is a pending one-line block ..
if ( defined($index_start_one_line_block) ) {
# Fix for b1208: if a side comment follows this closing
# brace then we must include its length in the length test
# ... unless the -issl flag is set (fixes b1307-1309).
# Assume a minimum of 1 blank space to the comment.
my $added_length = 0;
if ( $has_side_comment
&& !$rOpts_ignore_side_comment_lengths
&& $next_nonblank_token_type eq '#' )
{
$added_length = 1 + $rLL->[$K_last]->[_TOKEN_LENGTH_];
}
# we have to terminate it if..
if (
# it is too long (final length may be different from
# initial estimate). note: must allow 1 space for this
# token
$self->excess_line_length( $index_start_one_line_block,
$max_index_to_go ) + $added_length >= 0
)
{
$index_start_one_line_block = undef;
}
}
# put a break before this closing curly brace if appropriate
$self->end_batch()
if ( $max_index_to_go >= 0
&& !$nobreak_BEFORE_BLOCK
&& !defined($index_start_one_line_block) );
# store the closing curly brace
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# ok, we just stored a closing curly brace. Often, but
# not always, we want to end the line immediately.
# So now we have to check for special cases.
# if this '}' successfully ends a one-line block..
my $one_line_block_type = EMPTY_STRING;
my $keep_going;
if ( defined($index_start_one_line_block) ) {
# Remember the type of token just before the
# opening brace. It would be more general to use
# a stack, but this will work for one-line blocks.
# c1461 fix
my $Ko = $self->[_K_opening_container_]->{$type_sequence};
my $Kom = $self->K_previous_nonblank($Ko);
if ( defined($Kom) ) {
$one_line_block_type = $rLL->[$Kom]->[_TYPE_];
}
# we have to actually make it by removing tentative
# breaks that were set within it
$self->undo_forced_breakpoint_stack(0);
# For -lp, extend the nobreak to include a trailing
# terminal ','. This is because the -lp indentation was
# not known when making one-line blocks, so we may be able
# to move the line back to fit. Otherwise we may create a
# needlessly stranded comma on the next line.
my $iend_nobreak = $max_index_to_go - 1;
if ( $rOpts_line_up_parentheses
&& $next_nonblank_token_type eq ','
&& $Knnb eq $K_last )
{
my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
my $is_excluded =
$self->[_ris_excluded_lp_container_]->{$p_seqno};
$iend_nobreak = $max_index_to_go if ( !$is_excluded );
}
$self->set_nobreaks( $index_start_one_line_block,
$iend_nobreak );
# save starting block indexes so that sub correct_lp can
# check and adjust -lp indentation (c098)
push @{$ri_starting_one_line_block},
$index_start_one_line_block;
# then re-initialize for the next one-line block
$index_start_one_line_block = undef;
# then decide if we want to break after the '}' ..
# We will keep going to allow certain brace followers as in:
# do { $ifclosed = 1; last } unless $losing;
#
# But make a line break if the curly ends a
# significant block:
if (
(
$is_block_without_semicolon{$block_type}
# Follow users break point for
# one line block types U & G, such as a 'try' block
|| $one_line_block_type =~ /^[UG]$/
&& $Ktoken_vars == $K_last
)
# if needless semicolon follows we handle it later
&& $next_nonblank_token ne ';'
)
{
$self->end_batch()
unless ($no_internal_newlines);
}
}
# set string indicating what we need to look for brace follower
# tokens
if ( $is_if_unless_elsif_else{$block_type} ) {
$rbrace_follower = undef;
}
elsif ( $block_type eq 'do' ) {
$rbrace_follower = \%is_do_follower;
if (
$self->tight_paren_follows( $K_to_go[0], $Ktoken_vars )
)
{
$rbrace_follower = { ')' => 1 };
}
}
# added eval for borris.t
elsif ($is_sort_map_grep_eval{$block_type}
|| $one_line_block_type eq 'G' )
{
$rbrace_follower = undef;
$keep_going = 1;
}
# anonymous sub
elsif ( $self->[_ris_asub_block_]->{$type_sequence} ) {
if ($one_line_block_type) {
$rbrace_follower = \%is_anon_sub_1_brace_follower;
# Exceptions to help keep -lp intact, see git #74 ...
# Exception 1: followed by '}' on this line
if ( $Ktoken_vars < $K_last
&& $next_nonblank_token eq '}' )
{
$rbrace_follower = undef;
$keep_going = 1;
}
# Exception 2: followed by '}' on next line if -lp set.
# The -lp requirement allows the formatting to follow
# old breaks when -lp is not used, minimizing changes.
# Fixes issue c087.
elsif ($Ktoken_vars == $K_last
&& $rOpts_line_up_parentheses )
{
my $K_closing_container =
$self->[_K_closing_container_];
my $p_seqno = $parent_seqno_to_go[$max_index_to_go];
my $Kc = $K_closing_container->{$p_seqno};
my $is_excluded =
$self->[_ris_excluded_lp_container_]->{$p_seqno};
$keep_going =
( defined($Kc)
&& $rLL->[$Kc]->[_TOKEN_] eq '}'
&& !$is_excluded
&& $Kc - $Ktoken_vars <= 2 );
$rbrace_follower = undef if ($keep_going);
}
else {
## not an exception
}
}
else {
$rbrace_follower = \%is_anon_sub_brace_follower;
}
}
# None of the above: specify what can follow a closing
# brace of a block which is not an
# if/elsif/else/do/sort/map/grep/eval
# Testfiles:
# 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
else {
$rbrace_follower = \%is_other_brace_follower;
}
# See if an elsif block is followed by another elsif or else;
# complain if not.
if ( $block_type eq 'elsif' ) {
# more code on this line ? ( this is unusual )
if ( $next_nonblank_token_type ne 'b'
&& $next_nonblank_token_type ne '#' )
{
# check for 'elsif' or 'else'
if ( !$is_elsif_else{$next_nonblank_token} ) {
write_logfile_entry("(No else block)\n");
# Note that we cannot add a missing else block
# in this case because more code follows the
# closing elsif brace on the same line.
if ( $rOpts_warn_missing_else && !DEVEL_MODE ) {
my $lno =
$rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
warning("$lno: No else block\n");
}
}
}
# no more code on this line, so check on next line
else {
my $K_next = $self->K_next_code($K_last);
if ( !defined($K_next)
|| $rLL->[$K_next]->[_TYPE_] ne 'k'
|| !$is_elsif_else{ $rLL->[$K_next]->[_TOKEN_] } )
{
$K_dangling_elsif = $Ktoken_vars;
write_logfile_entry("(No else block)\n");
if ( $rOpts_warn_missing_else && !DEVEL_MODE ) {
my $lno =
$rLL->[$Ktoken_vars]->[_LINE_INDEX_] + 1;
if ($rOpts_add_missing_else) {
warning(
"$lno: Adding missing else block\n");
}
else {
warning(
"$lno: No else block (use -ame to add one)\n"
);
}
}
}
}
}
# keep going after certain block types (map,sort,grep,eval)
# added eval for borris.t
if ($keep_going) {
# keep going
$rbrace_follower = undef;
}
# if no more tokens, postpone decision until re-entering
elsif ( ( $next_nonblank_token_type eq 'b' )
&& $rOpts_add_newlines )
{
if ( !$rbrace_follower ) {
$self->end_batch()
if (!$no_internal_newlines
&& $max_index_to_go >= 0 );
}
}
elsif ($rbrace_follower) {
if ( $rbrace_follower->{$next_nonblank_token} ) {
# Fix for b1385: keep break after a comma following a
# 'do' block. This could also be used for other block
# types, but that would cause a significant change in
# existing formatting without much benefit.
if ( $next_nonblank_token_type eq ','
&& $Knnb eq $K_last
&& $block_type eq 'do'
&& $rOpts_add_newlines
&& $self->is_trailing_comma($Knnb) )
{
$self->[_rbreak_after_Klast_]->{$K_last} = 1;
}
}
else {
$self->end_batch()
if (!$no_internal_newlines
&& $max_index_to_go >= 0 );
}
$rbrace_follower = undef;
}
else {
$self->end_batch()
if ( !$no_internal_newlines && $max_index_to_go >= 0 );
}
} ## end treatment of closing block token
#------------------------------
# handle here_doc target string
#------------------------------
elsif ( $type eq 'h' ) {
# no newlines after seeing here-target
$no_internal_newlines = 2;
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
}
#-----------------------------
# handle all other token types
#-----------------------------
else {
$self->store_token_to_go( $Ktoken_vars, $rtoken_vars );
# break after a label if requested
if ( $rOpts_break_after_labels
&& $type eq 'J'
&& $rOpts_break_after_labels == 1 )
{
$self->end_batch()
unless ($no_internal_newlines);
}
}
# remember previous nonblank, non-comment OUTPUT token
$K_last_nonblank_code = $Ktoken_vars;
} ## end of loop over all tokens in this line
return;
} ## end sub process_line_inner_loop
} ## end closure process_line_of_CODE
sub tight_paren_follows {
my ( $self, $K_to_go_0, $K_ic ) = @_;
# Given:
# $K_to_go_0 = first token index K of this output batch (=K_to_go[0])
# $K_ic = index of the closing do brace (=K_to_go[$max_index_to_go])
# Return:
# false if we want a break after the closing do brace
# true if we do not want a break after the closing do brace
# We are at the closing brace of a 'do' block. See if this brace is
# followed by a closing paren, and if so, set a flag which indicates
# that we do not want a line break between the '}' and ')'.
# xxxxx ( ...... do { ... } ) {
# ^-------looking at this brace, K_ic
# Subscript notation:
# _i = inner container (braces in this case)
# _o = outer container (parens in this case)
# _io = inner opening = '{'
# _ic = inner closing = '}'
# _oo = outer opening = '('
# _oc = outer closing = ')'
# |--K_oo |--K_oc = outer container
# xxxxx ( ...... do { ...... } ) {
# |--K_io |--K_ic = inner container
# In general, the safe thing to do is return a 'false' value
# if the statement appears to be complex. This will have
# the downstream side-effect of opening up outer containers
# to help make complex code readable. But for simpler
# do blocks it can be preferable to keep the code compact
# by returning a 'true' value.
return unless defined($K_ic);
my $rLL = $self->[_rLL_];
# we should only be called at a closing block
my $seqno_i = $rLL->[$K_ic]->[_TYPE_SEQUENCE_];
return unless ($seqno_i); # shouldn't happen;
# This only applies if the next nonblank is a ')'
my $K_oc = $self->K_next_nonblank($K_ic);
return unless defined($K_oc);
my $token_next = $rLL->[$K_oc]->[_TOKEN_];
return unless ( $token_next eq ')' );
my $seqno_o = $rLL->[$K_oc]->[_TYPE_SEQUENCE_];
my $K_io = $self->[_K_opening_container_]->{$seqno_i};
my $K_oo = $self->[_K_opening_container_]->{$seqno_o};
return unless ( defined($K_io) && defined($K_oo) );
# RULE 1: Do not break before a closing signature paren
# (regardless of complexity). This is a fix for issue git#22.
# Looking for something like:
# sub xxx ( ... do { ... } ) {
# ^----- next block_type
my $K_test = $self->K_next_nonblank($K_oc);
if ( defined($K_test) && $rLL->[$K_test]->[_TYPE_] eq '{' ) {
my $seqno_test = $rLL->[$K_test]->[_TYPE_SEQUENCE_];
if ($seqno_test) {
if ( $self->[_ris_asub_block_]->{$seqno_test}
|| $self->[_ris_sub_block_]->{$seqno_test} )
{
return 1;
}
}
}
# RULE 2: Break if the contents within braces appears to be 'complex'. We
# base this decision on the number of tokens between braces.
# xxxxx ( ... do { ... } ) {
# ^^^^^^
# Although very simple, it has the advantages of (1) being insensitive to
# changes in lengths of identifier names, (2) easy to understand, implement
# and test. A test case for this is 't/snippets/long_line.in'.
# Example: $K_ic - $K_oo = 9 [Pass Rule 2]
# if ( do { $2 !~ /&/ } ) { ... }
# Example: $K_ic - $K_oo = 10 [Pass Rule 2]
# for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
# Example: $K_ic - $K_oo = 20 [Fail Rule 2]
# test_zero_args( "do-returned list slice", do { ( 10, 11 )[ 2, 3 ]; });
return if ( $K_ic - $K_io > 16 );
# RULE 3: break if the code between the opening '(' and the '{' is 'complex'
# As with the previous rule, we decide based on the token count
# xxxxx ( ... do { ... } ) {
# ^^^^^^^^
# Example: $K_ic - $K_oo = 9 [Pass Rule 2]
# $K_io - $K_oo = 4 [Pass Rule 3]
# if ( do { $2 !~ /&/ } ) { ... }
# Example: $K_ic - $K_oo = 10 [Pass rule 2]
# $K_io - $K_oo = 9 [Pass rule 3]
# for ( split /\s*={70,}\s*/, do { local $/; <DATA> }) { ... }
return if ( $K_io - $K_oo > 9 );
# RULE 4: Break if we have already broken this batch of output tokens
return if ( $K_oo < $K_to_go_0 );
# RULE 5: Break if input is not on one line
# For example, we will set the flag for the following expression
# written in one line:
# This has: $K_ic - $K_oo = 10 [Pass rule 2]
# $K_io - $K_oo = 8 [Pass rule 3]
# $self->debug( 'Error: ' . do { local $/; <$err> } );
# but we break after the brace if it is on multiple lines on input, since
# the user may prefer it on multiple lines:
# [Fail rule 5]
# $self->debug(
# 'Error: ' . do { local $/; <$err> }
# );
if ( !$rOpts_ignore_old_breakpoints ) {
my $iline_oo = $rLL->[$K_oo]->[_LINE_INDEX_];
my $iline_oc = $rLL->[$K_oc]->[_LINE_INDEX_];
return if ( $iline_oo != $iline_oc );
}
# OK to keep the paren tight
return 1;
} ## end sub tight_paren_follows
my %is_brace_semicolon_colon;
BEGIN {
my @q = qw( { } ; : );
@is_brace_semicolon_colon{@q} = (1) x scalar(@q);
}
sub starting_one_line_block {
my ( $self, $Kj, $K_last_nonblank, $K_last ) = @_;
# After seeing an opening curly brace, look for the closing brace and see
# if the entire block will fit on a line. This routine is not always right
# so a check is made later (at the closing brace) to make sure we really
# have a one-line block. We have to do this preliminary check, though,
# because otherwise we would always break at a semicolon within a one-line
# block if the block contains multiple statements.
# Given:
# $Kj = index of opening brace
# $K_last_nonblank = index of previous nonblank code token
# $K_last = index of last token of input line
# Calls 'create_one_line_block' if one-line block might be formed.
# Returns:
# $too_long:
# true = distance from opening keyword to OPENING brace exceeds
# the maximum line length.
# false otherwise
# Note that this flag is for distance from the statement start to the
# OPENING brace, not the closing brace.
my $rbreak_container = $self->[_rbreak_container_];
my $rshort_nested = $self->[_rshort_nested_];
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
# kill any current block - we can only go 1 deep
create_one_line_block(undef);
my $i_start = 0;
# This routine should not have been called if there are no tokens in the
# 'to_go' arrays of previously stored tokens. A previous call to
# 'store_token_to_go' should have stored an opening brace. An error here
# indicates that a programming change may have caused a flush operation to
# clean out the previously stored tokens.
if ( !defined($max_index_to_go) || $max_index_to_go < 0 ) {
Fault("program bug: store_token_to_go called incorrectly\n")
if (DEVEL_MODE);
return;
}
# Return if block should be broken
my $type_sequence_j = $rLL->[$Kj]->[_TYPE_SEQUENCE_];
if ( $rbreak_container->{$type_sequence_j} ) {
return;
}
my $ris_bli_container = $self->[_ris_bli_container_];
my $is_bli = $ris_bli_container->{$type_sequence_j};
my $block_type = $rblock_type_of_seqno->{$type_sequence_j};
$block_type = EMPTY_STRING unless ( defined($block_type) );
my $previous_nonblank_token = EMPTY_STRING;
my $i_last_nonblank = -1;
if ( defined($K_last_nonblank) ) {
$i_last_nonblank = $K_last_nonblank - $K_to_go[0];
if ( $i_last_nonblank >= 0 ) {
$previous_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
}
}
#---------------------------------------------------------------------
# find the starting keyword for this block (such as 'if', 'else', ...)
#---------------------------------------------------------------------
if (
$max_index_to_go == 0
##|| $block_type =~ /^[\{\}\;\:]$/
|| $is_brace_semicolon_colon{$block_type}
|| substr( $block_type, 0, 7 ) eq 'package'
)
{
$i_start = $max_index_to_go;
}
# the previous nonblank token should start these block types
elsif (
$i_last_nonblank >= 0
&& ( $previous_nonblank_token eq $block_type
|| $self->[_ris_asub_block_]->{$type_sequence_j}
|| $self->[_ris_sub_block_]->{$type_sequence_j}
|| substr( $block_type, -2, 2 ) eq '()' )
)
{
$i_start = $i_last_nonblank;
# For signatures and extended syntax ...
# If this brace follows a parenthesized list, we should look back to
# find the keyword before the opening paren because otherwise we might
# form a one line block which stays intact, and cause the parenthesized
# expression to break open. That looks bad.
if ( $tokens_to_go[$i_start] eq ')' ) {
# Find the opening paren
my $K_start = $K_to_go[$i_start];
return unless defined($K_start);
my $seqno = $type_sequence_to_go[$i_start];
return unless ($seqno);
my $K_opening = $K_opening_container->{$seqno};
return if ( !defined($K_opening) );
my $i_opening = $i_start + ( $K_opening - $K_start );
# give up if not on this line
return if ( $i_opening < 0 );
$i_start = $i_opening;
# go back one token before the opening paren
if ( $i_start > 0 ) { $i_start-- }
if ( $types_to_go[$i_start] eq 'b' && $i_start > 0 ) { $i_start--; }
my $lev = $levels_to_go[$i_start];
if ( $lev > $rLL->[$Kj]->[_LEVEL_] ) { return }
}
# include a length of any preceding assignment token if we break before
# it (b1461)
elsif ( $i_start > 0 ) {
my $i_eq = $i_start - 1;
if ( $types_to_go[$i_eq] eq 'b' && $i_eq > 0 ) { $i_eq--; }
my $type_eq = $types_to_go[$i_eq];
if ( $is_assignment{$type_eq} && $want_break_before{$type_eq} ) {
$i_start = $i_eq;
}
}
else {
# $i_start is 0 - cannot back up
}
}
elsif ( $previous_nonblank_token eq ')' ) {
# For something like "if (xxx) {", the keyword "if" will be
# just after the most recent break. This will be 0 unless
# we have just killed a one-line block and are starting another.
# (doif.t)
# Note: cannot use inext_index_to_go[] here because that array
# is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
# Patch to avoid breaking short blocks defined with extended_syntax:
# Strip off any trailing () which was added in the parser to mark
# the opening keyword. For example, in the following
# create( TypeFoo $e) {$bubba}
# the blocktype would be marked as create()
my $stripped_block_type = $block_type;
if ( substr( $block_type, -2, 2 ) eq '()' ) {
$stripped_block_type = substr( $block_type, 0, -2 );
}
if ( $tokens_to_go[$i_start] ne $stripped_block_type ) {
return;
}
}
# patch for SWITCH/CASE to retain one-line case/when blocks
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
# Note: cannot use inext_index_to_go[] here because that array
# is still being constructed.
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
if ( $tokens_to_go[$i_start] ne $block_type ) {
return;
}
}
else {
#-------------------------------------------
# Couldn't find start - return too_long flag
#-------------------------------------------
return 1;
}
my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
my $maximum_line_length =
$maximum_line_length_at_level[ $levels_to_go[$i_start] ];
# see if distance to the opening container is too great to even start
if ( $pos > $maximum_line_length ) {
#------------------------------
# too long to the opening token
#------------------------------
return 1;
}
#-----------------------------------------------------------------------
# OK so far: the statement is not to long just to the OPENING token. Now
# see if everything to the closing token will fit on one line
#-----------------------------------------------------------------------
# This is part of an update to fix cases b562 .. b983
my $K_closing = $self->[_K_closing_container_]->{$type_sequence_j};
return unless ( defined($K_closing) );
my $container_length = $rLL->[$K_closing]->[_CUMULATIVE_LENGTH_] -
$rLL->[$Kj]->[_CUMULATIVE_LENGTH_];
my $excess = $pos + 1 + $container_length - $maximum_line_length;
# Add a small tolerance for welded tokens (case b901)
if ( $total_weld_count && $self->is_welded_at_seqno($type_sequence_j) ) {
$excess += 2;
}
if ( $excess > 0 ) {
# line is too long... there is no chance of forming a one line block
# if the excess is more than 1 char
return if ( $excess > 1 );
# ... and give up if it is not a one-line block on input.
# note: for a one-line block on input, it may be possible to keep
# it as a one-line block (by removing a needless semicolon ).
my $K_start = $K_to_go[$i_start];
my $ldiff =
$rLL->[$K_closing]->[_LINE_INDEX_] - $rLL->[$K_start]->[_LINE_INDEX_];
return if ($ldiff);
}
#------------------------------------------------------------------
# Loop to check contents and length of the potential one-line block
#------------------------------------------------------------------
foreach my $Ki ( $Kj + 1 .. $K_last ) {
# old whitespace could be arbitrarily large, so don't use it
if ( $rLL->[$Ki]->[_TYPE_] eq 'b' ) { $pos += 1 }
else { $pos += $rLL->[$Ki]->[_TOKEN_LENGTH_] }
# ignore some small blocks
my $type_sequence_i = $rLL->[$Ki]->[_TYPE_SEQUENCE_];
my $nobreak = $rshort_nested->{$type_sequence_i};
# Return false result if we exceed the maximum line length,
if ( $pos > $maximum_line_length ) {
return;
}
# keep going for non-containers
elsif ( !$type_sequence_i ) {
}
# return if we encounter another opening brace before finding the
# closing brace.
elsif ($rLL->[$Ki]->[_TOKEN_] eq '{'
&& $rLL->[$Ki]->[_TYPE_] eq '{'
&& $rblock_type_of_seqno->{$type_sequence_i}
&& !$nobreak )
{
return;
}
# if we find our closing brace..
elsif ($rLL->[$Ki]->[_TOKEN_] eq '}'
&& $rLL->[$Ki]->[_TYPE_] eq '}'
&& $rblock_type_of_seqno->{$type_sequence_i}
&& !$nobreak )
{
# be sure any trailing comment also fits on the line
my $Ki_nonblank = $Ki;
if ( $Ki_nonblank < $K_last ) {
$Ki_nonblank++;
if ( $rLL->[$Ki_nonblank]->[_TYPE_] eq 'b'
&& $Ki_nonblank < $K_last )
{
$Ki_nonblank++;
}
}
# Patch for one-line sort/map/grep/eval blocks with side comments:
# We will ignore the side comment length for sort/map/grep/eval
# because this can lead to statements which change every time
# perltidy is run. Here is an example from Denis Moskowitz which
# oscillates between these two states without this patch:
## --------
## grep { $_->foo ne 'bar' } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
## @baz;
##
## grep {
## $_->foo ne 'bar'
## } # asdfa asdf asdf asdf asdf asdf asdf asdf asdf asdf asdf
## @baz;
## --------
# When the first line is input it gets broken apart by the main
# line break logic in sub process_line_of_CODE.
# When the second line is input it gets recombined by
# process_line_of_CODE and passed to the output routines. The
# output routines (break_long_lines) do not break it apart
# because the bond strengths are set to the highest possible value
# for grep/map/eval/sort blocks, so the first version gets output.
# It would be possible to fix this by changing bond strengths,
# but they are high to prevent errors in older versions of perl.
# See c100 for eval test.
if ( $Ki < $K_last
&& $rLL->[$K_last]->[_TYPE_] eq '#'
&& $rLL->[$K_last]->[_LEVEL_] == $rLL->[$Ki]->[_LEVEL_]
&& !$rOpts_ignore_side_comment_lengths
&& !$is_sort_map_grep_eval{$block_type}
&& $K_last - $Ki_nonblank <= 2 )
{
# Only include the side comment for if/else/elsif/unless if it
# immediately follows (because the current '$rbrace_follower'
# logic for these will give an immediate brake after these
# closing braces). So for example a line like this
# if (...) { ... } ; # very long comment......
# will already break like this:
# if (...) { ... }
# ; # very long comment......
# so we do not need to include the length of the comment, which
# would break the block. Project 'bioperl' has coding like this.
if ( !$is_if_unless_elsif_else{$block_type}
|| $K_last == $Ki_nonblank )
{
$Ki_nonblank = $K_last;
$pos += $rLL->[$Ki_nonblank]->[_TOKEN_LENGTH_];
if ( $Ki_nonblank > $Ki + 1 ) {
# source whitespace could be anything, assume
# at least one space before the hash on output
if ( $rLL->[ $Ki + 1 ]->[_TYPE_] eq 'b' ) {
$pos += 1;
}
else { $pos += $rLL->[ $Ki + 1 ]->[_TOKEN_LENGTH_] }
}
if ( $pos >= $maximum_line_length ) {
return;
}
}
}
#--------------------------
# ok, it's a one-line block
#--------------------------
create_one_line_block($i_start);
return;
}
# just keep going for other characters
else {
}
}
#--------------------------------------------------
# End Loop to examine tokens in potential one-block
#--------------------------------------------------
# We haven't hit the closing brace, but there is still space. So the
# question here is, should we keep going to look at more lines in hopes of
# forming a new one-line block, or should we stop right now. The problem
# with continuing is that we will not be able to honor breaks before the
# opening brace if we continue.
# Typically we will want to keep trying to make one-line blocks for things
# like sort/map/grep/eval. But it is not always a good idea to make as
# many one-line blocks as possible, so other types are not done. The user
# can always use -mangle.
# If we want to keep going, we will create a new one-line block.
# The blocks which we can keep going are in a hash, but we never want
# to continue if we are at a '-bli' block.
if ( $want_one_line_block{$block_type} && !$is_bli ) {
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$type_sequence_j};
my $semicolon_count = $rtype_count
&& $rtype_count->{';'} ? $rtype_count->{';'} : 0;
# Ignore a terminal semicolon in the count
if ( $semicolon_count <= 2 ) {
my $K_closing_container = $self->[_K_closing_container_];
my $K_closing_j = $K_closing_container->{$type_sequence_j};
my $Kp = $self->K_previous_nonblank($K_closing_j);
if ( defined($Kp)
&& $rLL->[$Kp]->[_TYPE_] eq ';' )
{
$semicolon_count -= 1;
}
}
if ( $semicolon_count <= 0 ) {
create_one_line_block($i_start);
}
elsif ( $semicolon_count == 1 && $block_type eq 'eval' ) {
# Mark short broken eval blocks for possible later use in
# avoiding adding spaces before a 'package' line. This is not
# essential but helps keep newer and older formatting the same.
$self->[_ris_short_broken_eval_block_]->{$type_sequence_j} = 1;
}
else {
# do not continue the search
}
}
return;
} ## end sub starting_one_line_block
sub unstore_token_to_go {
# remove most recent token from output stream
my $self = shift;
if ( $max_index_to_go > 0 ) {
$max_index_to_go--;
}
else {
$max_index_to_go = UNDEFINED_INDEX;
}
return;
} ## end sub unstore_token_to_go
sub compare_indentation_levels {
my ( $self, $K_first, $guessed_indentation_level, $line_number ) = @_;
# Check to see if output line leading space agrees with input line.
# This can be very useful for debugging a script which has an extra
# or missing brace.
# Given:
# $K_first = index of first token on the line
# $guessed_indentation_level = guess based on leading spaces in input
# $line_number = line number in input stream
return unless ( defined($K_first) );
my $rLL = $self->[_rLL_];
# ignore a line with a leading blank token - issue c195
my $type = $rLL->[$K_first]->[_TYPE_];
return if ( $type eq 'b' );
my $structural_indentation_level = $self->[_radjusted_levels_]->[$K_first];
# record max structural depth for log file
if ( $structural_indentation_level > $self->[_maximum_BLOCK_level_] ) {
$self->[_maximum_BLOCK_level_] = $structural_indentation_level;
$self->[_maximum_BLOCK_level_at_line_] = $line_number;
}
my $type_sequence = $rLL->[$K_first]->[_TYPE_SEQUENCE_];
my $is_closing_block =
$type_sequence
&& $self->[_rblock_type_of_seqno_]->{$type_sequence}
&& $type eq '}';
if ( $guessed_indentation_level ne $structural_indentation_level ) {
$self->[_last_tabbing_disagreement_] = $line_number;
if ($is_closing_block) {
if ( !$self->[_in_brace_tabbing_disagreement_] ) {
$self->[_in_brace_tabbing_disagreement_] = $line_number;
}
if ( !$self->[_first_brace_tabbing_disagreement_] ) {
$self->[_first_brace_tabbing_disagreement_] = $line_number;
}
}
if ( !$self->[_in_tabbing_disagreement_] ) {
$self->[_tabbing_disagreement_count_]++;
if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
"Start indentation disagreement: input=$guessed_indentation_level; output=$structural_indentation_level\n"
);
}
$self->[_in_tabbing_disagreement_] = $line_number;
$self->[_first_tabbing_disagreement_] = $line_number
unless ( $self->[_first_tabbing_disagreement_] );
}
}
else {
$self->[_in_brace_tabbing_disagreement_] = 0 if ($is_closing_block);
my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
if ($in_tabbing_disagreement) {
if ( $self->[_tabbing_disagreement_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
"End indentation disagreement from input line $in_tabbing_disagreement\n"
);
if ( $self->[_tabbing_disagreement_count_] == MAX_NAG_MESSAGES )
{
write_logfile_entry(
"No further tabbing disagreements will be noted\n");
}
}
$self->[_in_tabbing_disagreement_] = 0;
}
}
return;
} ## end sub compare_indentation_levels
###################################################
# CODE SECTION 8: Utilities for setting breakpoints
###################################################
{ ## begin closure set_forced_breakpoint
my @forced_breakpoint_undo_stack;
# These are global vars for efficiency:
# my $forced_breakpoint_count;
# my $forced_breakpoint_undo_count;
# my $index_max_forced_break;
# Break before or after certain tokens based on user settings
my %break_before_or_after_token;
BEGIN {
# Updated to use all operators. This fixes case b1054
# Here is the previous simplified version:
## my @q = qw( . : ? and or xor && || );
my @q = @all_operators;
push @q, ',';
@break_before_or_after_token{@q} = (1) x scalar(@q);
} ## end BEGIN
sub set_fake_breakpoint {
# Just bump up the breakpoint count as a signal that there are breaks.
# This is useful if we have breaks but may want to postpone deciding
# where to make them.
$forced_breakpoint_count++;
return;
} ## end sub set_fake_breakpoint
use constant DEBUG_FORCE => 0;
sub set_forced_breakpoint {
my ( $self, $i ) = @_;
# Set a breakpoint AFTER the token at index $i in the _to_go arrays.
# Exceptions:
# - If the token at index $i is a blank, backup to $i-1 to
# get to the previous nonblank token.
# - For certain tokens, the break may be placed BEFORE the token
# at index $i, depending on user break preference settings.
# - If a break is made after an opening token, then a break will
# also be made before the corresponding closing token.
# Returns: $i_nonblank
# = index of the token after which the breakpoint was actually placed
# = undef if breakpoint was not set.
my $i_nonblank;
if ( !defined($i) || $i < 0 ) {
# Calls with bad index $i are harmless but waste time and should
# be caught and eliminated during code development.
if (DEVEL_MODE) {
my ( $a, $b, $c ) = caller();
Fault(
"Bad call to forced breakpoint from $a $b $c ; called with i=$i; please fix\n"
);
}
return;
}
# Break after token $i
$i_nonblank = $self->set_forced_breakpoint_AFTER($i);
# If we break at an opening container..break at the closing
my $set_closing;
if ( defined($i_nonblank)
&& $is_opening_sequence_token{ $tokens_to_go[$i_nonblank] } )
{
$set_closing = 1;
$self->set_closing_breakpoint($i_nonblank);
}
DEBUG_FORCE && do {
my ( $pkg, $file_uu, $lno ) = caller();
my $msg =
"FORCE $forced_breakpoint_count after call from $pkg $lno with i=$i max=$max_index_to_go";
if ( !defined($i_nonblank) ) {
$i = EMPTY_STRING unless defined($i);
$msg .= " but could not set break after i='$i'\n";
}
else {
my $nobr = $nobreak_to_go[$i_nonblank];
$nobr = 0 if ( !defined($nobr) );
$msg .= <<EOM;
set break after $i_nonblank: tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobr
EOM
if ( defined($set_closing) ) {
$msg .=
" Also set closing breakpoint corresponding to this token\n";
}
}
print {*STDOUT} $msg;
};
return $i_nonblank;
} ## end sub set_forced_breakpoint
sub set_forced_breakpoint_AFTER {
my ( $self, $i ) = @_;
# Set a breakpoint AFTER the token at index $i in the _to_go arrays.
# Exceptions:
# - If the token at index $i is a blank, backup to $i-1 to
# get to the previous nonblank token.
# - For certain tokens, the break may be placed BEFORE the token
# at index $i, depending on user break preference settings.
# Returns:
# - the index of the token after which the break was set, or
# - undef if no break was set
# This routine is only called by sub set_forced_breakpoint and
# sub set_closing_breakpoint.
return if ( !defined($i) );
return if ( $i < 0 );
# Back up at a blank so we have a token to examine.
# This was added to fix for cases like b932 involving an '=' break.
if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
# Never break between welded tokens
return
if ( $total_weld_count
&& $self->[_rK_weld_right_]->{ $K_to_go[$i] } );
my $token = $tokens_to_go[$i];
my $type = $types_to_go[$i];
# patch for phantom commas, used for -qwaf
if ( !$token && $type eq ',' ) { $token = ',' }
# For certain tokens, use user settings to decide if we break before or
# after it
if ( $break_before_or_after_token{$token}
&& ( $type eq $token || $type eq 'k' ) )
{
if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
}
# breaks are forced before 'if' and 'unless'
elsif ( $is_if_unless{$token} && $type eq 'k' ) { $i-- }
else {
# do not break before
}
if ( $i >= 0 && $i <= $max_index_to_go ) {
my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
if ( $i_nonblank >= 0
&& !$nobreak_to_go[$i_nonblank]
&& !$forced_breakpoint_to_go[$i_nonblank] )
{
$forced_breakpoint_to_go[$i_nonblank] = 1;
if ( $i_nonblank > $index_max_forced_break ) {
$index_max_forced_break = $i_nonblank;
}
$forced_breakpoint_count++;
$forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ]
= $i_nonblank;
# success
return $i_nonblank;
}
}
return;
} ## end sub set_forced_breakpoint_AFTER
sub clear_breakpoint_undo_stack {
my ($self) = @_;
$forced_breakpoint_undo_count = 0;
return;
}
use constant DEBUG_UNDOBP => 0;
sub undo_forced_breakpoint_stack {
my ( $self, $i_start ) = @_;
# Given $i_start, a non-negative index the 'undo stack' of breakpoints,
# remove all breakpoints from the top of the 'undo stack' down to and
# including index $i_start.
# The 'undo stack' is a stack of all breakpoints made for a batch of
# code.
if ( $i_start < 0 ) {
$i_start = 0;
my ( $pkg, $file_uu, $lno ) = caller();
# Bad call, can only be due to a recent programming change.
Fault(
"Program Bug: undo_forced_breakpoint_stack from $pkg $lno has bad i=$i_start "
) if (DEVEL_MODE);
return;
}
while ( $forced_breakpoint_undo_count > $i_start ) {
$forced_breakpoint_undo_count--;
my $i =
$forced_breakpoint_undo_stack[$forced_breakpoint_undo_count];
if ( $i >= 0 && $i <= $max_index_to_go ) {
$forced_breakpoint_to_go[$i] = 0;
$forced_breakpoint_count--;
DEBUG_UNDOBP && do {
my ( $pkg, $file_uu, $lno ) = caller();
print {*STDOUT}
"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $pkg $lno max=$max_index_to_go\n";
};
}
# shouldn't happen, but not a critical error
else {
if (DEVEL_MODE) {
my ( $pkg, $file_uu, $lno ) = caller();
Fault(<<EOM);
Program Bug: undo_forced_breakpoint from $pkg $lno has i=$i but max=$max_index_to_go
EOM
}
}
} ## end while ( $forced_breakpoint_undo_count...)
return;
} ## end sub undo_forced_breakpoint_stack
} ## end closure set_forced_breakpoint
{ ## begin closure set_closing_breakpoint
my %postponed_breakpoint;
sub initialize_postponed_breakpoint {
%postponed_breakpoint = ();
return;
}
sub has_postponed_breakpoint {
my ($seqno) = @_;
return $postponed_breakpoint{$seqno};
}
sub set_closing_breakpoint {
my ( $self, $i_break ) = @_;
# Set a breakpoint at a matching closing token
# Given:
# $i_break = index of the opening token
if ( defined( $mate_index_to_go[$i_break] ) ) {
# Don't reduce the '2' in the statement below.
# Test files: attrib.t, BasicLyx.pm.html
if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
# break before } ] and ), but sub set_forced_breakpoint will decide
# to break before or after a ? and :
my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
$self->set_forced_breakpoint_AFTER(
$mate_index_to_go[$i_break] - $inc );
}
}
else {
my $type_sequence = $type_sequence_to_go[$i_break];
if ($type_sequence) {
$postponed_breakpoint{$type_sequence} = 1;
}
}
return;
} ## end sub set_closing_breakpoint
} ## end closure set_closing_breakpoint
#########################################
# CODE SECTION 9: Process batches of code
#########################################
{ ## begin closure grind_batch_of_CODE
# The routines in this closure begin the processing of a 'batch' of code.
# A variable to keep track of consecutive nonblank lines so that we can
# insert occasional blanks
my @nonblank_lines_at_depth;
# A variable to remember maximum size of previous batches; this is needed
# by the logical padding routine
my $peak_batch_size;
my $batch_count;
# variables to keep track of indentation of unmatched containers.
my %saved_opening_indentation;
sub initialize_grind_batch_of_CODE {
@nonblank_lines_at_depth = ();
$peak_batch_size = 0;
$batch_count = 0;
%saved_opening_indentation = ();
return;
} ## end sub initialize_grind_batch_of_CODE
# sub grind_batch_of_CODE receives sections of code which are the longest
# possible lines without a break. In other words, it receives what is left
# after applying all breaks forced by blank lines, block comments, side
# comments, pod text, and structural braces. Its job is to break this code
# down into smaller pieces, if necessary, which fit within the maximum
# allowed line length. Then it sends the resulting lines of code on down
# the pipeline to the VerticalAligner package, breaking the code into
# continuation lines as necessary. The batch of tokens are in the "to_go"
# arrays. The name 'grind' is slightly suggestive of a machine continually
# breaking down long lines of code, but mainly it is unique and easy to
# remember and find with an editor search.
# The two routines 'process_line_of_CODE' and 'grind_batch_of_CODE' work
# together in the following way:
# - 'process_line_of_CODE' receives the original INPUT lines one-by-one and
# combines them into the largest sequences of tokens which might form a new
# line.
# - 'grind_batch_of_CODE' determines which tokens will form the OUTPUT
# lines.
# So sub 'process_line_of_CODE' builds up the longest possible continuous
# sequences of tokens, regardless of line length, and then
# grind_batch_of_CODE breaks these sequences back down into the new output
# lines.
# Sub 'grind_batch_of_CODE' ships its output lines to the vertical aligner.
use constant DEBUG_GRIND => 0;
sub check_grind_input {
my ($self) = @_;
# Check for valid input to sub grind_batch_of_CODE. An error here
# would most likely be due to an error in 'sub store_token_to_go'.
# NOTE: This is only called when DEVEL_MODE is set.
# Be sure there are tokens in the batch
if ( $max_index_to_go < 0 ) {
Fault(<<EOM);
sub grind incorrectly called with max_index_to_go=$max_index_to_go
EOM
}
my $Klimit = $self->[_Klimit_];
# The local batch tokens must be a continuous part of the global token
# array.
my $KK;
foreach my $ii ( 0 .. $max_index_to_go ) {
my $Km = $KK;
$KK = $K_to_go[$ii];
if ( !defined($KK) || $KK < 0 || $KK > $Klimit ) {
$KK = '(undef)' unless defined($KK);
Fault(<<EOM);
at batch index at i=$ii, the value of K_to_go[$ii] = '$KK' is out of the valid range (0 - $Klimit)
EOM
}
if ( $ii > 0 && $KK != $Km + 1 ) {
my $im = $ii - 1;
Fault(<<EOM);
Non-sequential K indexes: i=$im has Km=$Km; but i=$ii has K=$KK; expecting K = Km+1
EOM
}
}
return;
} ## end sub check_grind_input
# This filter speeds up a critical if-test
my %quick_filter;
BEGIN {
my @q = qw# L { ( [ R ] ) } ? : f => #;
push @q, ',';
@quick_filter{@q} = (1) x scalar(@q);
}
sub grind_batch_of_CODE {
my ($self) = @_;
#-----------------------------------------------------------------
# This sub directs the formatting of one complete batch of tokens.
# The tokens of the batch are in the '_to_go' arrays.
#-----------------------------------------------------------------
# $this_batch = ref to array of vars for this output batch
my $this_batch = $self->[_this_batch_];
$this_batch->[_peak_batch_size_] = $peak_batch_size;
$this_batch->[_batch_count_] = ++$batch_count;
$self->check_grind_input() if (DEVEL_MODE);
# This routine is only called from sub flush_batch_of_code, so that
# routine is a better spot for debugging.
DEBUG_GRIND && do {
my $token = my $type = EMPTY_STRING;
if ( $max_index_to_go >= 0 ) {
$token = $tokens_to_go[$max_index_to_go];
$type = $types_to_go[$max_index_to_go];
}
my $output_str = EMPTY_STRING;
if ( $max_index_to_go > 20 ) {
my $mm = $max_index_to_go - 10;
$output_str =
join( EMPTY_STRING, @tokens_to_go[ 0 .. 10 ] ) . " ... "
. join( EMPTY_STRING,
@tokens_to_go[ $mm .. $max_index_to_go ] );
}
else {
$output_str = join EMPTY_STRING,
@tokens_to_go[ 0 .. $max_index_to_go ];
}
print {*STDOUT} <<EOM;
grind got batch number $batch_count with $max_index_to_go tokens, last type '$type' tok='$token', text:
$output_str
EOM
};
# Remove any trailing blank, which is possible (c192 has example)
if ( $max_index_to_go >= 0 && $types_to_go[$max_index_to_go] eq 'b' ) {
$max_index_to_go -= 1;
}
return if ( $max_index_to_go < 0 );
my $lp_object_count_this_batch;
if ($rOpts_line_up_parentheses) {
$lp_object_count_this_batch = $self->set_lp_indentation();
}
#-----------------------------
# Shortcut for block comments.
#-----------------------------
my $is_HSC;
if ( !$max_index_to_go
&& $types_to_go[0] eq '#' )
{
# But not for block comments with lp because they must use the lp
# corrector step below.
# And not for hanging side comments.
my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
$is_HSC = $batch_CODE_type && $batch_CODE_type eq 'HSC';
if ( !$is_HSC
&& !$lp_object_count_this_batch )
{
my $ibeg = 0;
$this_batch->[_ri_first_] = [$ibeg];
$this_batch->[_ri_last_] = [$ibeg];
$self->convey_batch_to_vertical_aligner();
my $level = $levels_to_go[$ibeg];
$self->[_last_line_leading_type_] = $types_to_go[$ibeg];
$self->[_last_line_leading_level_] = $level;
$nonblank_lines_at_depth[$level] = 1;
return;
}
}
#-------------
# Normal route
#-------------
my $rLL = $self->[_rLL_];
#-------------------------------------------------------
# Loop over the batch to initialize some batch variables
#-------------------------------------------------------
my $comma_count_in_batch = 0;
my @colon_list;
my @ix_seqno_controlling_ci;
my %comma_arrow_count;
my $comma_arrow_count_contained = 0;
my @unmatched_closing_indexes_in_this_batch;
my @unmatched_opening_indexes_in_this_batch;
my @i_for_semicolon;
foreach my $i ( 0 .. $max_index_to_go ) {
if ( $types_to_go[$i] eq 'b' ) {
$inext_to_go[$i] = $inext_to_go[ $i - 1 ] = $i + 1;
next;
}
$inext_to_go[$i] = $i + 1;
# This is an optional shortcut to save a bit of time by skipping
# most tokens. Note: the filter may need to be updated if the
# next 'if' tests are ever changed to include more token types.
next if ( !$quick_filter{ $types_to_go[$i] } );
my $type = $types_to_go[$i];
# gather info needed by sub break_long_lines
if ( $type_sequence_to_go[$i] ) {
# remember indexes of any tokens controlling xci
# in this batch. This list is needed by sub undo_ci.
my $seqno = $type_sequence_to_go[$i];
if ( $self->[_ris_seqno_controlling_ci_]->{$seqno} ) {
push @ix_seqno_controlling_ci, $i;
}
if ( $is_opening_sequence_token{ $tokens_to_go[$i] } ) {
if ( $self->[_rbreak_container_]->{$seqno} ) {
$self->set_forced_breakpoint($i);
}
push @unmatched_opening_indexes_in_this_batch, $i;
if ( $type eq '?' ) {
push @colon_list, $type;
}
}
else { ## $is_closing_sequence_token{$token}
if ( $i > 0 && $self->[_rbreak_container_]->{$seqno} ) {
$self->set_forced_breakpoint( $i - 1 );
}
my $i_mate = pop @unmatched_opening_indexes_in_this_batch;
if ( defined($i_mate) && $i_mate >= 0 ) {
if ( $type_sequence_to_go[$i_mate] ==
$type_sequence_to_go[$i] )
{
$mate_index_to_go[$i] = $i_mate;
$mate_index_to_go[$i_mate] = $i;
my $cac = $comma_arrow_count{$seqno};
$comma_arrow_count_contained += $cac if ($cac);
}
else {
push @unmatched_opening_indexes_in_this_batch,
$i_mate;
push @unmatched_closing_indexes_in_this_batch, $i;
}
}
else {
push @unmatched_closing_indexes_in_this_batch, $i;
}
if ( $type eq ':' ) {
push @colon_list, $type;
}
}
} ## end if ($seqno)
elsif ( $type eq ',' ) { $comma_count_in_batch++; }
elsif ( $type eq '=>' ) {
if (@unmatched_opening_indexes_in_this_batch) {
my $j = $unmatched_opening_indexes_in_this_batch[-1];
my $seqno = $type_sequence_to_go[$j];
$comma_arrow_count{$seqno}++;
}
}
elsif ( $type eq 'f' ) {
push @i_for_semicolon, $i;
}
else {
## not a special type
}
} ## end for ( my $i = 0 ; $i <=...)
# Break at a single interior C-style for semicolon in this batch (c154)
if ( @i_for_semicolon && @i_for_semicolon == 1 ) {
my $i = $i_for_semicolon[0];
my $inext = $inext_to_go[$i];
if ( $inext <= $max_index_to_go && $types_to_go[$inext] ne '#' ) {
$self->set_forced_breakpoint($i);
}
}
my $is_unbalanced_batch = @unmatched_opening_indexes_in_this_batch +
@unmatched_closing_indexes_in_this_batch;
if (@unmatched_opening_indexes_in_this_batch) {
$this_batch->[_runmatched_opening_indexes_] =
\@unmatched_opening_indexes_in_this_batch;
}
if (@ix_seqno_controlling_ci) {
$this_batch->[_rix_seqno_controlling_ci_] =
\@ix_seqno_controlling_ci;
}
#------------------------
# Set special breakpoints
#------------------------
# If this line ends in a code block brace, set breaks at any
# previous closing code block braces to breakup a chain of code
# blocks on one line. This is very rare but can happen for
# user-defined subs. For example we might be looking at this:
# BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
my $saw_good_break; # flag to force breaks even if short line
if (
# looking for opening or closing block brace
$block_type_to_go[$max_index_to_go]
# never any good breaks if just one token
&& $max_index_to_go > 0
# but not one of these which are never duplicated on a line:
# until|while|for|if|elsif|else
&& !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go]
}
)
{
my $lev = $nesting_depth_to_go[$max_index_to_go];
# Walk backwards from the end and
# set break at any closing block braces at the same level.
# But quit if we are not in a chain of blocks.
foreach my $i ( reverse( 0 .. $max_index_to_go - 1 ) ) {
last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
next if ( $levels_to_go[$i] > $lev ); # skip past higher level
if ( $block_type_to_go[$i] ) {
if ( $tokens_to_go[$i] eq '}' ) {
$self->set_forced_breakpoint($i);
$saw_good_break = 1;
}
}
# quit if we see anything besides words, function, blanks
# at this level
elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
else {
## keep going
}
}
}
#-----------------------------------------------
# insertion of any blank lines before this batch
#-----------------------------------------------
my $imin = 0;
my $imax = $max_index_to_go;
# trim any blank tokens - for safety, but should not be necessary
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
if ( $imin > $imax ) {
if (DEVEL_MODE) {
my $K0 = $K_to_go[0];
my $lno = EMPTY_STRING;
if ( defined($K0) ) { $lno = $rLL->[$K0]->[_LINE_INDEX_] + 1 }
Fault(<<EOM);
Strange: received batch containing only blanks near input line $lno: after trimming imin=$imin, imax=$imax
EOM
}
return;
}
my $last_line_leading_type = $self->[_last_line_leading_type_];
my $last_line_leading_level = $self->[_last_line_leading_level_];
my $leading_type = $types_to_go[0];
my $leading_level = $levels_to_go[0];
# add blank line(s) before certain key types but not after a comment
if ( $last_line_leading_type ne '#' ) {
my $blank_count = 0;
my $leading_token = $tokens_to_go[0];
# break before certain key blocks except one-liners
if ( $leading_type eq 'k' ) {
if ( $leading_token eq 'BEGIN' || $leading_token eq 'END' ) {
$blank_count = $rOpts->{'blank-lines-before-subs'}
if ( terminal_type_i( 0, $max_index_to_go ) ne '}' );
}
# Break before certain block types if we haven't had a
# break at this level for a while. This is the
# difficult decision..
elsif ($last_line_leading_type ne 'b'
&& $is_if_unless_while_until_for_foreach{$leading_token} )
{
my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
if ( !defined($lc) ) { $lc = 0 }
# patch for RT #128216: no blank line inserted at a level
# change
if ( $levels_to_go[0] != $last_line_leading_level ) {
$lc = 0;
}
if ( $rOpts->{'blanks-before-blocks'}
&& $lc >= $rOpts->{'long-block-line-count'}
&& $self->consecutive_nonblank_lines() >=
$rOpts->{'long-block-line-count'}
&& terminal_type_i( 0, $max_index_to_go ) ne '}' )
{
$blank_count = 1;
}
}
else {
# no blank line needed
}
}
# blank lines before subs except declarations and one-liners
# Fix for c250: added new type 'P', changed 'i' to 'S'
elsif ( $leading_type eq 'S' || $leading_type eq 'P' ) {
my $special_identifier =
$self->[_ris_special_identifier_token_]->{$leading_token};
if ($special_identifier) {
## $leading_token =~ /$SUB_PATTERN/
if ( $special_identifier eq 'sub' ) {
$blank_count = $rOpts->{'blank-lines-before-subs'}
if ( terminal_type_i( 0, $max_index_to_go ) !~
/^[\;\}\,]$/ );
}
# break before all package declarations
## substr( $leading_token, 0, 8 ) eq 'package '
elsif ( $special_identifier eq 'package' ) {
# ... except in a very short eval block
my $pseqno = $parent_seqno_to_go[0];
$blank_count = $rOpts->{'blank-lines-before-packages'}
if (
!$self->[_ris_short_broken_eval_block_]->{$pseqno}
);
}
else {
DEVEL_MODE && Fault(<<EOM);
Found special identifier '$special_identifier', but expecting 'sub' or 'package'
EOM
}
}
}
# Check for blank lines wanted before a closing brace
elsif ( $leading_token eq '}' ) {
if ( $rOpts->{'blank-lines-before-closing-block'}
&& $block_type_to_go[0]
&& $block_type_to_go[0] =~
/$blank_lines_before_closing_block_pattern/ )
{
my $nblanks = $rOpts->{'blank-lines-before-closing-block'};
if ( $nblanks > $blank_count ) {
$blank_count = $nblanks;
}
}
}
else {
# no blank line needed
}
if ($blank_count) {
# future: send blank line down normal path to VerticalAligner?
$self->flush_vertical_aligner();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->require_blank_code_lines($blank_count);
}
}
# update blank line variables and count number of consecutive
# non-blank, non-comment lines at this level
if ( $leading_level == $last_line_leading_level
&& ( $leading_type ne '#' || $is_HSC )
&& defined( $nonblank_lines_at_depth[$leading_level] ) )
{
$nonblank_lines_at_depth[$leading_level]++;
}
else {
$nonblank_lines_at_depth[$leading_level] = 1;
}
$self->[_last_line_leading_type_] = $is_HSC ? 'q' : $leading_type;
$self->[_last_line_leading_level_] = $leading_level;
#--------------------------
# scan lists and long lines
#--------------------------
# Flag to remember if we called sub 'pad_array_to_go'.
# Some routines (break_lists(), break_long_lines() ) need some
# extra tokens added at the end of the batch. Most batches do not
# use these routines, so we will avoid calling 'pad_array_to_go'
# unless it is needed.
my $called_pad_array_to_go;
# set all forced breakpoints for good list formatting
my $is_long_line;
my $multiple_old_lines_in_batch;
if ( $max_index_to_go > 0 ) {
$is_long_line =
$self->excess_line_length( $imin, $max_index_to_go ) > 0;
my $Kbeg = $K_to_go[0];
my $Kend = $K_to_go[$max_index_to_go];
$multiple_old_lines_in_batch =
$rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_];
}
# Optional optimization: avoid calling break_lists for a single block
# brace. This is done by turning off the flag $is_unbalanced_batch.
elsif ($is_unbalanced_batch) {
my $block_type = $block_type_to_go[0];
if ( $block_type
&& !$lp_object_count_this_batch
&& $is_block_without_semicolon{$block_type} )
{
# opening blocks can skip break_lists call if no commas in
# container.
if ( $leading_type eq '{' ) {
my $seqno = $type_sequence_to_go[0];
my $rtype_count = $self->[_rtype_count_by_seqno_]->{$seqno};
if ($rtype_count) {
my $comma_count = $rtype_count->{','};
if ( !$comma_count ) {
$is_unbalanced_batch = 0;
}
}
}
# closing block braces can be skipped
else {
$is_unbalanced_batch = 0;
}
}
}
else {
# single balanced token
}
my $rbond_strength_bias = [];
if (
$is_long_line
|| $multiple_old_lines_in_batch
# must always call break_lists() with unbalanced batches because
# it is maintaining some stacks
|| $is_unbalanced_batch
# call break_lists if we might want to break at commas
|| (
$comma_count_in_batch
&& ( $rOpts_maximum_fields_per_table > 0
&& $rOpts_maximum_fields_per_table <= $comma_count_in_batch
|| $rOpts_comma_arrow_breakpoints == 0 )
)
# call break_lists if user may want to break open some one-line
# hash references
|| ( $comma_arrow_count_contained
&& $rOpts_comma_arrow_breakpoints != 3 )
)
{
# add a couple of extra terminal blank tokens
$self->pad_array_to_go();
$called_pad_array_to_go = 1;
my $sgb = $self->break_lists( $is_long_line, $rbond_strength_bias );
$saw_good_break ||= $sgb;
}
# let $ri_first and $ri_last be references to lists of
# first and last tokens of line fragments to output..
my ( $ri_first, $ri_last );
#-----------------------------
# a single token uses one line
#-----------------------------
if ( !$max_index_to_go ) {
$ri_first = [$imin];
$ri_last = [$imax];
}
# for multiple tokens
else {
#-------------------------
# write a single line if..
#-------------------------
if (
(
# this line is 'short'
!$is_long_line
# and we didn't see a good breakpoint
&& !$saw_good_break
# and we don't already have an interior breakpoint
&& !$forced_breakpoint_count
)
# or, we aren't allowed to add any newlines
|| !$rOpts_add_newlines
)
{
$ri_first = [$imin];
$ri_last = [$imax];
}
#-----------------------------
# otherwise use multiple lines
#-----------------------------
else {
# add a couple of extra terminal blank tokens if we haven't
# already done so
$self->pad_array_to_go() unless ($called_pad_array_to_go);
( $ri_first, $ri_last, my $rbond_strength_to_go ) =
$self->break_long_lines( $saw_good_break, \@colon_list,
$rbond_strength_bias );
$self->break_all_chain_tokens( $ri_first, $ri_last );
$self->break_method_call_chains( $ri_first, $ri_last )
if ( %{ $self->[_rseqno_arrow_call_chain_start_] }
&& !$pack_operator_types{'->'} );
$self->break_equals( $ri_first, $ri_last )
if @{$ri_first} >= 3;
# now we do a correction step to clean this up a bit
# (The only time we would not do this is for debugging)
$self->recombine_breakpoints( $ri_first, $ri_last,
$rbond_strength_to_go )
if ( $rOpts_recombine && @{$ri_first} > 1 );
$self->insert_final_ternary_breaks( $ri_first, $ri_last )
if (@colon_list);
}
$self->insert_breaks_before_list_opening_containers( $ri_first,
$ri_last )
if ( %break_before_container_types && $max_index_to_go > 0 );
# Check for a phantom semicolon at the end of the batch
if ( !$token_lengths_to_go[$imax] && $types_to_go[$imax] eq ';' ) {
$self->unmask_phantom_token($imax);
}
if ( $rOpts_one_line_block_semicolons == 0 ) {
$self->delete_one_line_semicolons( $ri_first, $ri_last );
}
# Remember the largest batch size processed. This is needed by the
# logical padding routine to avoid padding the first nonblank token
if ( $max_index_to_go > $peak_batch_size ) {
$peak_batch_size = $max_index_to_go;
}
}
# The batch has now been divided into lines
$this_batch->[_ri_first_] = $ri_first;
$this_batch->[_ri_last_] = $ri_last;
#-------------------
# -lp corrector step
#-------------------
if ($lp_object_count_this_batch) {
$self->correct_lp_indentation();
}
#--------------------
# ship this batch out
#--------------------
$self->convey_batch_to_vertical_aligner();
#-------------------------------------------------------------------
# Write requested number of blank lines after an opening block brace
#-------------------------------------------------------------------
if ($rOpts_blank_lines_after_opening_block) {
my $iterm = $imax;
if ( $types_to_go[$iterm] eq '#' && $iterm > $imin ) {
$iterm -= 1;
if ( $types_to_go[$iterm] eq 'b' && $iterm > $imin ) {
$iterm -= 1;
}
}
if ( $types_to_go[$iterm] eq '{'
&& $block_type_to_go[$iterm]
&& $block_type_to_go[$iterm] =~
/$blank_lines_after_opening_block_pattern/ )
{
my $nblanks = $rOpts_blank_lines_after_opening_block;
$self->flush_vertical_aligner();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->require_blank_code_lines($nblanks);
}
}
return;
} ## end sub grind_batch_of_CODE
sub iprev_to_go {
my ($i) = @_;
# Given index $i of a token in the '_to_go' arrays, return
# the index of the previous nonblank token.
return $i - 1 > 0
&& $types_to_go[ $i - 1 ] eq 'b' ? $i - 2 : $i - 1;
} ## end sub iprev_to_go
sub unmask_phantom_token {
my ( $self, $iend ) = @_;
# Turn a phantom token into a real token.
# Input parameter:
# $iend = the index in the output batch array of this token.
# Phantom tokens are specially marked token types (such as ';') with
# no token text which only become real tokens if they occur at the end
# of an output line. At one time phantom ',' tokens were handled
# here, but now they are processed elsewhere.
my $rLL = $self->[_rLL_];
my $KK = $K_to_go[$iend];
my $line_number = 1 + $rLL->[$KK]->[_LINE_INDEX_];
my $type = $types_to_go[$iend];
return unless ( $type eq ';' );
my $tok = $type;
my $tok_len = length($tok);
if ( $want_left_space{$type} != WS_NO ) {
$tok = SPACE . $tok;
$tok_len += 1;
}
$tokens_to_go[$iend] = $tok;
$token_lengths_to_go[$iend] = $tok_len;
$rLL->[$KK]->[_TOKEN_] = $tok;
$rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
$self->note_added_semicolon($line_number);
# This changes the summed lengths of the rest of this batch
foreach ( $iend .. $max_index_to_go ) {
$summed_lengths_to_go[ $_ + 1 ] += $tok_len;
}
return;
} ## end sub unmask_phantom_token
sub save_opening_indentation {
my ( $self, $rindentation_list ) = @_;
# Save indentations of lines of all unmatched opening tokens.
# These will be used by sub get_opening_indentation.
# This should be called after each batch of tokens is output.
# Given:
# $rindentation_list = ref to indentations for each line
# $runmatched_opening_indexes = list of indexes of unmatched tokens
my $this_batch = $self->[_this_batch_];
my $runmatched_opening_indexes =
$this_batch->[_runmatched_opening_indexes_];
$runmatched_opening_indexes = []
if ( !defined($runmatched_opening_indexes) );
# QW INDENTATION PATCH 1:
# Also save indentation for multiline qw quotes
my @i_qw;
my $seqno_qw_opening;
if ( $types_to_go[$max_index_to_go] eq 'q' ) {
my $KK = $K_to_go[$max_index_to_go];
$seqno_qw_opening =
$self->[_rstarting_multiline_qw_seqno_by_K_]->{$KK};
if ($seqno_qw_opening) {
push @i_qw, $max_index_to_go;
}
}
# we need to save indentations of any unmatched opening tokens
# in this batch because we may need them in a subsequent batch.
foreach my $i_opening ( @{$runmatched_opening_indexes}, @i_qw ) {
my $seqno = $type_sequence_to_go[$i_opening];
if ( !$seqno ) {
if ( $seqno_qw_opening && $i_opening == $max_index_to_go ) {
$seqno = $seqno_qw_opening;
}
else {
# shouldn't happen
$seqno = 'UNKNOWN';
DEVEL_MODE && Fault("unable to find sequence number\n");
}
}
$saved_opening_indentation{$seqno} = [
$self->lookup_opening_indentation(
$i_opening, $rindentation_list
)
];
}
return;
} ## end sub save_opening_indentation
sub get_saved_opening_indentation {
my ($seqno) = @_;
# Lookup indentation of an output line with a given container token
# Given:
# $seqno = sequence number of a container token
my ( $indent, $offset, $is_leading, $exists ) = ( 0, 0, 0, 0 );
if ($seqno) {
if ( $saved_opening_indentation{$seqno} ) {
( $indent, $offset, $is_leading ) =
@{ $saved_opening_indentation{$seqno} };
$exists = 1;
}
}
# some kind of serious error it doesn't exist
# (example is badfile.t)
return ( $indent, $offset, $is_leading, $exists );
} ## end sub get_saved_opening_indentation
} ## end closure grind_batch_of_CODE
sub lookup_opening_indentation {
my ( $self, $i_opening, $rindentation_list ) = @_;
# Get the indentation of the line in the current output batch
# which output a selected opening token
#
# Given:
# $i_opening - index of an opening token in the current output batch
# whose line indentation we need
# $rindentation_list - reference to a list containing the indentation
# used for each line. (NOTE: the first slot in
# this list is the last returned line number, and this is
# followed by the list of indentations).
#
# Return
# -the indentation of the line which contained token $i_opening
# -and its offset (number of columns) from the start of the line
# $ri_first - reference to list of the first index $i for each output
# line in this batch
# $ri_last - reference to list of the last index $i for each output
# line in this batch
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
if ( !@{$ri_last} ) {
# An error here implies a bug introduced by a recent program change.
# Every batch of code has lines, so this should never happen.
if (DEVEL_MODE) {
Fault("Error in opening_indentation: no lines");
}
return ( 0, 0, 0 );
}
my $nline = $rindentation_list->[0]; # line number of previous lookup
# reset line location if necessary
$nline = 0 if ( $i_opening < $ri_first->[$nline] );
# find the correct line
if ( $i_opening <= $ri_last->[-1] ) {
while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
}
# Error - token index is out of bounds - shouldn't happen
# A program bug has been introduced in one of the calling routines.
# We better stop here.
else {
my $i_last_line = $ri_last->[-1];
if (DEVEL_MODE) {
Fault(<<EOM);
Program bug in call to lookup_opening_indentation - index out of range
called with index i_opening=$i_opening > $i_last_line = max index of last line
This batch has max index = $max_index_to_go,
EOM
}
$nline = $#{$ri_last};
}
$rindentation_list->[0] =
$nline; # save line number to start looking next call
my $ibeg = $ri_first->[$nline];
my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
my $is_leading = ( $ibeg == $i_opening );
return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
} ## end sub lookup_opening_indentation
sub terminal_type_i {
my ( $ibeg, $iend ) = @_;
# Given:
# ($ibeg, $iend) = index range of the current output buffer line
# Returns type of last token on this line (terminal token), as follows:
# # for a full-line comment
# ' ' for a blank line
# otherwise returns final token type
# Start at the end and work backwards
my $i = $iend;
my $type_i = $types_to_go[$i];
# Check for side comment
if ( $type_i eq '#' ) {
$i--;
if ( $i < $ibeg ) {
return $type_i;
}
$type_i = $types_to_go[$i];
}
# Skip past a blank
if ( $type_i eq 'b' ) {
$i--;
if ( $i < $ibeg ) {
return $type_i;
}
$type_i = $types_to_go[$i];
}
# Found it..make sure it is a BLOCK termination,
# but hide a terminal } after sort/map/grep/eval/do because it is not
# necessarily the end of the line. (terminal.t)
my $block_type = $block_type_to_go[$i];
if (
$type_i eq '}'
&& ( !$block_type
|| $is_sort_map_grep_eval_do{$block_type} )
)
{
$type_i = 'b';
}
return $type_i;
} ## end sub terminal_type_i
sub pad_array_to_go {
my ($self) = @_;
# To simplify coding in break_lists and set_bond_strengths, it helps to
# create some extra blank tokens at the end of the arrays. We also add
# some undef's to help guard against using invalid data.
$K_to_go[ $max_index_to_go + 1 ] = undef;
$tokens_to_go[ $max_index_to_go + 1 ] = EMPTY_STRING;
$tokens_to_go[ $max_index_to_go + 2 ] = EMPTY_STRING;
$tokens_to_go[ $max_index_to_go + 3 ] = undef;
$types_to_go[ $max_index_to_go + 1 ] = 'b';
$types_to_go[ $max_index_to_go + 2 ] = 'b';
$types_to_go[ $max_index_to_go + 3 ] = undef;
$nesting_depth_to_go[ $max_index_to_go + 2 ] = undef;
$nesting_depth_to_go[ $max_index_to_go + 1 ] =
$nesting_depth_to_go[$max_index_to_go];
# /^[R\}\)\]]$/
if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
# Nesting depths are set to be >=0 in sub write_line, so it should
# not be possible to get here unless the code has a bracing error
# which leaves a closing brace with zero nesting depth.
if ( !get_saw_brace_error() ) {
if (DEVEL_MODE) {
Fault(<<EOM);
Program bug in pad_array_to_go: hit nesting error which should have been caught
EOM
}
}
}
else {
$nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
}
}
# /^[L\{\(\[]$/
elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
$nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
}
else {
## must be ? or :
}
return;
} ## end sub pad_array_to_go
sub break_method_call_chains {
my ( $self, $ri_left, $ri_right ) = @_;
# If there is a break at any member of a method call chain, break
# at each method call in the chain (all or none logic). See git #171.
# Given:
# $ri_first - reference to list of the first index $i for each output
# line in this batch
# $ri_last - reference to list of the last index $i for each output line
# in this batch
return unless ( %{ $self->[_rseqno_arrow_call_chain_start_] } );
# Look for '->' breakpoints
my @i_arrow_breaks;
my $rlist = !$want_break_before{'->'} ? $ri_right : $ri_left;
foreach my $ii ( @{$rlist} ) {
if ( $types_to_go[$ii] eq '->' ) { push @i_arrow_breaks, $ii }
}
return unless (@i_arrow_breaks);
# See if these are part of a call chain
my @insert_list;
my %is_end_i;
@is_end_i{ @{$ri_left} } = (1) x scalar( @{$ri_left} );
@is_end_i{ @{$ri_right} } = (1) x scalar( @{$ri_right} );
my $one = !$want_break_before{'->'} ? 0 : 1;
foreach my $ii (@i_arrow_breaks) {
my $ip = iprev_to_go($ii);
next if ( $ip < 0 || $tokens_to_go[$ip] ne ')' );
my $seqno = $type_sequence_to_go[$ip];
my $seqno_start = $self->[_rseqno_arrow_call_chain_start_]->{$seqno};
next unless ($seqno_start);
# Found a call chain...
my @Klist = @{ $self->[_rarrow_call_chain_]->{$seqno_start} };
my $Kref = $K_to_go[0];
foreach my $KK (@Klist) {
# Add missing breaks
my $i_K = $KK - $Kref;
next if ( $i_K <= 0 || $i_K >= $max_index_to_go );
next if ( $is_end_i{$i_K} );
if ( $K_to_go[$i_K] != $KK ) {
## shouldn't happen due to previous checks on i vs K
DEVEL_MODE && Fault(<<EOM);
unexpected array offset error i=$i_K K=$KK Kref= $Kref
EOM
next;
}
push @insert_list, $i_K - $one;
}
}
# Insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
} ## end sub break_method_call_chains
sub break_all_chain_tokens {
my ( $self, $ri_left, $ri_right ) = @_;
# Scan the current breakpoints looking for breaks at certain "chain
# operators" (. : && || + etc) which often occur repeatedly in a long
# statement. If we see a break at any one, break at all similar tokens
# within the same container.
# Given:
# $ri_first - reference to list of the first index $i for each output
# line in this batch
# $ri_last - reference to list of the last index $i for each output line
# in this batch
my %saw_chain_type;
my %left_chain_type;
my %right_chain_type;
my %interior_chain_type;
my @insert_list;
my $nmax = @{$ri_right} - 1;
# scan the left and right end tokens of all lines
my $end_count = 0;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
$typel = '+' if ( $typel eq '-' ); # treat + and - the same
$typer = '+' if ( $typer eq '-' );
$typel = '*' if ( $typel eq '/' ); # treat * and / the same
$typer = '*' if ( $typer eq '/' );
my $keyl = $typel eq 'k' ? $tokens_to_go[$il] : $typel;
my $keyr = $typer eq 'k' ? $tokens_to_go[$ir] : $typer;
if ( $is_chain_operator{$keyl} && $want_break_before{$typel} ) {
next if ( $typel eq '?' );
push @{ $left_chain_type{$keyl} }, $il;
$saw_chain_type{$keyl} = 1;
$end_count++;
}
if ( $is_chain_operator{$keyr} && !$want_break_before{$typer} ) {
next if ( $typer eq '?' );
push @{ $right_chain_type{$keyr} }, $ir;
$saw_chain_type{$keyr} = 1;
$end_count++;
}
}
return unless $end_count;
# now look for any interior tokens of the same types
my $interior_count = 0;
my $has_interior_dot_or_plus;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
foreach my $i ( $il + 1 .. $ir - 1 ) {
my $type = $types_to_go[$i];
my $key = $type eq 'k' ? $tokens_to_go[$i] : $type;
$key = '+' if ( $key eq '-' );
$key = '*' if ( $key eq '/' );
if ( $saw_chain_type{$key} ) {
push @{ $interior_chain_type{$key} }, $i;
$interior_count++;
$has_interior_dot_or_plus ||= ( $key eq '.' || $key eq '+' );
}
}
}
return unless $interior_count;
my @keys = keys %saw_chain_type;
# quit if just ONE continuation line with leading . For example--
# print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
# . $contents;
# Fixed for b1399.
if ( $has_interior_dot_or_plus && $nmax == 1 && @keys == 1 ) {
return;
}
# now make a list of all new break points
# loop over all chain types
foreach my $key (@keys) {
next if ( $pack_operator_types{$key} );
# loop over all interior chain tokens
foreach my $itest ( @{ $interior_chain_type{$key} } ) {
# loop over all left end tokens of same type
if ( $left_chain_type{$key} ) {
next if $nobreak_to_go[ $itest - 1 ];
foreach my $i ( @{ $left_chain_type{$key} } ) {
next unless $self->in_same_container_i( $i, $itest );
push @insert_list, $itest - 1;
# Break at matching ? if this : is at a different level.
# For example, the ? before $THRf_DEAD in the following
# should get a break if its : gets a break.
#
# my $flags =
# ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
# : ( $_ & 4 ) ? $THRf_R_DETACHED
# : $THRf_R_JOINABLE;
if ( $key eq ':'
&& $levels_to_go[$i] != $levels_to_go[$itest] )
{
my $i_question = $mate_index_to_go[$itest];
if ( defined($i_question) && $i_question > 0 ) {
push @insert_list, $i_question - 1;
}
}
last;
}
}
# loop over all right end tokens of same type
if ( $right_chain_type{$key} ) {
next if $nobreak_to_go[$itest];
foreach my $i ( @{ $right_chain_type{$key} } ) {
next unless $self->in_same_container_i( $i, $itest );
push @insert_list, $itest;
# break at matching ? if this : is at a different level
if ( $key eq ':'
&& $levels_to_go[$i] != $levels_to_go[$itest] )
{
my $i_question = $mate_index_to_go[$itest];
if ( defined($i_question) ) {
push @insert_list, $i_question;
}
}
last;
}
}
}
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
} ## end sub break_all_chain_tokens
sub insert_additional_breaks {
my ( $self, $ri_break_list, $ri_first, $ri_last ) = @_;
# This routine will add line breaks at requested locations after
# sub break_long_lines has made preliminary breaks.
# Given:
# $ri_break_list = list of index locations for additional breaks
# $ri_first - reference to current list of the first index $i for each
# output line in this batch
# $ri_last - reference to current list of the last index $i for each
# output line in this batch
my $i_f;
my $i_l;
my $line_number = 0;
foreach my $i_break_left ( sort { $a <=> $b } @{$ri_break_list} ) {
next if ( $nobreak_to_go[$i_break_left] );
$i_f = $ri_first->[$line_number];
$i_l = $ri_last->[$line_number];
while ( $i_l <= $i_break_left ) {
$line_number++;
# shouldn't happen unless caller passes bad indexes
if ( $line_number >= @{$ri_last} ) {
if (DEVEL_MODE) {
Fault(<<EOM);
Non-fatal program bug: couldn't set break at $i_break_left
EOM
}
return;
}
$i_f = $ri_first->[$line_number];
$i_l = $ri_last->[$line_number];
} ## end while ( $i_l <= $i_break_left)
# Do not leave a blank at the end of a line; back up if necessary
if ( $types_to_go[$i_break_left] eq 'b' ) { $i_break_left-- }
my $i_break_right = $inext_to_go[$i_break_left];
if ( $i_break_left >= $i_f
&& $i_break_left < $i_l
&& $i_break_right > $i_f
&& $i_break_right <= $i_l )
{
splice( @{$ri_first}, $line_number, 1, ( $i_f, $i_break_right ) );
splice( @{$ri_last}, $line_number, 1, ( $i_break_left, $i_l ) );
}
}
return;
} ## end sub insert_additional_breaks
{ ## begin closure in_same_container_i
my $ris_break_token;
my $ris_comma_token;
BEGIN {
# all cases break on seeing commas at same level
my @q = qw( => );
push @q, ',';
@{$ris_comma_token}{@q} = (1) x scalar(@q);
# Non-ternary text also breaks on seeing any of qw(? : || or )
# Example: we would not want to break at any of these .'s
# : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
push @q, qw( or || ? : );
@{$ris_break_token}{@q} = (1) x scalar(@q);
} ## end BEGIN
sub in_same_container_i {
my ( $self, $i1, $i2 ) = @_;
# Check to see if tokens at $i1 and $i2 are in the same container, and
# not separated by certain characters: => , ? : || or
# This is an interface between the _to_go arrays to the rLL array
# quick check
my $parent_seqno_1 = $parent_seqno_to_go[$i1];
return if ( $parent_seqno_to_go[$i2] ne $parent_seqno_1 );
if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
my $K1 = $K_to_go[$i1];
my $K2 = $K_to_go[$i2];
my $depth_1 = $nesting_depth_to_go[$i1];
return if ( $depth_1 < 0 );
# Shouldn't happen since i1 and i2 have same parent:
return unless ( $nesting_depth_to_go[$i2] == $depth_1 );
# Select character set to scan for
my $type_1 = $types_to_go[$i1];
my $rbreak = ( $type_1 ne ':' ) ? $ris_break_token : $ris_comma_token;
my $rK_next_seqno_by_K = $self->[_rK_next_seqno_by_K_];
# Fast preliminary loop to verify that tokens are in the same container
my $KK = $K1;
my $Knext_last = $KK;
while ( defined( $KK = $rK_next_seqno_by_K->[$KK] ) ) {
if ( $KK <= $Knext_last ) {
## shouldn't happen: $rK_next_seqno_by_K is corrupted
DEVEL_MODE && Fault(<<EOM);
Knext should not increase: Knext_last=$Knext_last >= Knext=$KK
EOM
last;
}
$Knext_last = $KK;
last if ( $KK >= $K2 );
my $ii = $i1 + $KK - $K1;
my $depth_i = $nesting_depth_to_go[$ii];
return if ( $depth_i < $depth_1 );
next if ( $depth_i > $depth_1 );
if ( $type_1 ne ':' ) {
my $tok_i = $tokens_to_go[$ii];
return if ( $tok_i eq '?' || $tok_i eq ':' );
}
} ## end while ( defined( $KK = $rK_next_seqno_by_K...))
# Slow loop checking for certain characters
#-----------------------------------------------------
# This is potentially a slow routine and not critical.
# For safety just give up for large differences.
# See test file 'infinite_loop.txt'
#-----------------------------------------------------
return if ( $i2 - $i1 > 200 );
foreach my $ii ( $i1 + 1 .. $i2 - 1 ) {
my $depth_i = $nesting_depth_to_go[$ii];
next if ( $depth_i > $depth_1 );
return if ( $depth_i < $depth_1 );
my $typ_i = $types_to_go[$ii];
if ( $typ_i eq 'k' ) { $typ_i = $tokens_to_go[$ii] }
return if ( $rbreak->{$typ_i} );
}
return 1;
} ## end sub in_same_container_i
} ## end closure in_same_container_i
sub break_equals {
my ( $self, $ri_left, $ri_right ) = @_;
# Look for assignment operators that could use a breakpoint.
# Given:
# $ri_first - reference to current list of the first index $i for each
# output line in this batch
# $ri_last - reference to current list of the last index $i for each
# output line in this batch
# For example, in the following snippet
#
# $HOME = $ENV{HOME}
# || $ENV{LOGDIR}
# || $pw[7]
# || die "no home directory for user $<";
#
# we could break at the = to get this, which is a little nicer:
# $HOME =
# $ENV{HOME}
# || $ENV{LOGDIR}
# || $pw[7]
# || die "no home directory for user $<";
#
# The logic here follows the logic in set_logical_padding, which
# will add the padding in the second line to improve alignment.
#
my $nmax = @{$ri_right} - 1;
return if ( $nmax < 2 );
# scan the left ends of first two lines
my $tokbeg = EMPTY_STRING;
my $depth_beg;
for my $n ( 1 .. 2 ) {
my $il = $ri_left->[$n];
my $typel = $types_to_go[$il];
my $tokenl = $tokens_to_go[$il];
my $keyl = $typel eq 'k' ? $tokenl : $typel;
my $has_leading_op = $is_chain_operator{$keyl};
return unless ($has_leading_op);
if ( $n > 1 ) {
return
unless ( $tokenl eq $tokbeg
&& $nesting_depth_to_go[$il] eq $depth_beg );
}
$tokbeg = $tokenl;
$depth_beg = $nesting_depth_to_go[$il];
}
# now look for any interior tokens of the same types
my $il = $ri_left->[0];
my $ir = $ri_right->[0];
# now make a list of all new break points
my @insert_list;
foreach my $i ( reverse( $il + 1 .. $ir - 1 ) ) {
my $type = $types_to_go[$i];
if ( $is_assignment{$type}
&& $nesting_depth_to_go[$i] eq $depth_beg )
{
if ( $want_break_before{$type} ) {
push @insert_list, $i - 1;
}
else {
push @insert_list, $i;
}
}
}
# Break after a 'return' followed by a chain of operators
# return ( $^O !~ /win32|dos/i )
# && ( $^O ne 'VMS' )
# && ( $^O ne 'OS2' )
# && ( $^O ne 'MacOS' );
# To give:
# return
# ( $^O !~ /win32|dos/i )
# && ( $^O ne 'VMS' )
# && ( $^O ne 'OS2' )
# && ( $^O ne 'MacOS' );
my $i = 0;
if ( $types_to_go[$i] eq 'k'
&& $tokens_to_go[$i] eq 'return'
&& $ir > $il
&& $nesting_depth_to_go[$i] eq $depth_beg )
{
push @insert_list, $i;
}
return unless (@insert_list);
# One final check...
# scan second and third lines and be sure there are no assignments
# we want to avoid breaking at an = to make something like this:
# unless ( $icon =
# $html_icons{"$type-$state"}
# or $icon = $html_icons{$type}
# or $icon = $html_icons{$state} )
for my $n ( 1 .. 2 ) {
my $il_n = $ri_left->[$n];
my $ir_n = $ri_right->[$n];
foreach my $ii ( $il_n + 1 .. $ir_n ) {
my $type = $types_to_go[$ii];
return
if ( $is_assignment{$type}
&& $nesting_depth_to_go[$ii] eq $depth_beg );
}
}
# ok, insert any new break point
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
} ## end sub break_equals
{ ## begin closure recombine_breakpoints
# This routine is called once per batch to see if it would be better
# to combine some of the lines into which the batch has been broken.
my %is_amp_amp;
my %is_math_op;
my %is_plus_minus;
my %is_mult_div;
BEGIN {
my @q;
@q = qw( && || );
@is_amp_amp{@q} = (1) x scalar(@q);
@q = qw( + - * / );
@is_math_op{@q} = (1) x scalar(@q);
@q = qw( + - );
@is_plus_minus{@q} = (1) x scalar(@q);
@q = qw( * / );
@is_mult_div{@q} = (1) x scalar(@q);
} ## end BEGIN
sub Debug_dump_breakpoints {
my ( $self, $ri_beg, $ri_end, $msg ) = @_;
# Debug routine to dump current breakpoints...not normally called
# Given: indexes to the current lines:
# $ri_beg = ref to array of BEGinning indexes of each line
# $ri_end = ref to array of ENDing indexes of each line
print {*STDOUT} "----Dumping breakpoints from: $msg----\n";
for my $n ( 0 .. @{$ri_end} - 1 ) {
my $ibeg = $ri_beg->[$n];
my $iend = $ri_end->[$n];
my $text = EMPTY_STRING;
foreach my $i ( $ibeg .. $iend ) {
$text .= $tokens_to_go[$i];
}
print {*STDOUT} "$n ($ibeg:$iend) $text\n";
}
print {*STDOUT} "----\n";
return;
} ## end sub Debug_dump_breakpoints
sub delete_one_line_semicolons {
my ( $self, $ri_beg, $ri_end ) = @_;
# Given: indexes to the current lines:
# $ri_beg = ref to array of beginning indexes of each line
# $ri_end = ref to array of ending indexes of each line
my $rLL = $self->[_rLL_];
my $K_opening_container = $self->[_K_opening_container_];
# Walk down the lines of this batch and delete any semicolons
# terminating one-line blocks;
my $nmax = @{$ri_end} - 1;
foreach my $n ( 0 .. $nmax ) {
my $i_beg = $ri_beg->[$n];
my $i_e = $ri_end->[$n];
my $K_beg = $K_to_go[$i_beg];
my $K_e = $K_to_go[$i_e];
my $K_end = $K_e;
my $type_end = $rLL->[$K_end]->[_TYPE_];
if ( $type_end eq '#' ) {
$K_end = $self->K_previous_nonblank($K_end);
if ( defined($K_end) ) { $type_end = $rLL->[$K_end]->[_TYPE_]; }
}
# we are looking for a line ending in closing brace
next
unless ( $type_end eq '}' && $rLL->[$K_end]->[_TOKEN_] eq '}' );
# ...and preceded by a semicolon on the same line
my $K_semicolon = $self->K_previous_nonblank($K_end);
next unless defined($K_semicolon);
my $i_semicolon = $i_beg + ( $K_semicolon - $K_beg );
next if ( $i_semicolon <= $i_beg );
next unless ( $rLL->[$K_semicolon]->[_TYPE_] eq ';' );
# Safety check - shouldn't happen - not critical
# This is not worth throwing a Fault, except in DEVEL_MODE
if ( $types_to_go[$i_semicolon] ne ';' ) {
DEVEL_MODE
&& Fault("unexpected type looking for semicolon");
next;
}
# ... with the corresponding opening brace on the same line
my $type_sequence = $rLL->[$K_end]->[_TYPE_SEQUENCE_];
my $K_opening = $K_opening_container->{$type_sequence};
next unless ( defined($K_opening) );
my $i_opening = $i_beg + ( $K_opening - $K_beg );
next if ( $i_opening < $i_beg );
# ... and only one semicolon between these braces
my $semicolon_count = 0;
foreach my $K ( $K_opening + 1 .. $K_semicolon - 1 ) {
if ( $rLL->[$K]->[_TYPE_] eq ';' ) {
$semicolon_count++;
last;
}
}
next if ($semicolon_count);
# ...ok, then make the semicolon invisible
my $len = $token_lengths_to_go[$i_semicolon];
$tokens_to_go[$i_semicolon] = EMPTY_STRING;
$token_lengths_to_go[$i_semicolon] = 0;
$rLL->[$K_semicolon]->[_TOKEN_] = EMPTY_STRING;
$rLL->[$K_semicolon]->[_TOKEN_LENGTH_] = 0;
foreach ( $i_semicolon .. $max_index_to_go ) {
$summed_lengths_to_go[ $_ + 1 ] -= $len;
}
}
return;
} ## end sub delete_one_line_semicolons
use constant DEBUG_RECOMBINE => 0;
sub recombine_breakpoints {
my ( $self, $ri_beg, $ri_end, $rbond_strength_to_go ) = @_;
# This sub implements the 'recombine' operation on a batch.
# Its task is to combine some of these lines back together to
# improve formatting. The need for this arises because
# sub 'break_long_lines' is very liberal in setting line breaks
# for long lines, always setting breaks at good breakpoints, even
# when that creates small lines. Sometimes small line fragments
# are produced which would look better if they were combined.
# Input parameters:
# $ri_beg = ref to array of BEGinning indexes of each line
# $ri_end = ref to array of ENDing indexes of each line
# $rbond_strength_to_go = array of bond strengths pulling
# tokens together, used to decide where best to recombine lines.
#-------------------------------------------------------------------
# Do nothing under extreme stress; use <= 2 for c171.
# (NOTE: New optimizations make this unnecessary. But removing this
# check is not really useful because this condition only occurs in
# test runs, and another formatting pass will fix things anyway.)
# This routine has a long history of improvements. Some past
# relevant issues are : c118, c167, c171, c186, c187, c193, c200.
#-------------------------------------------------------------------
return if ( $high_stress_level <= 2 );
my $nmax_start = @{$ri_end} - 1;
return if ( $nmax_start <= 0 );
my $iend_max = $ri_end->[$nmax_start];
if ( $types_to_go[$iend_max] eq '#' ) {
$iend_max = iprev_to_go($iend_max);
}
my $has_terminal_semicolon =
$iend_max >= 0 && $types_to_go[$iend_max] eq ';';
#--------------------------------------------------------------------
# Break into the smallest possible sub-sections to improve efficiency
#--------------------------------------------------------------------
# Also make a list of all good joining tokens between the lines
# n-1 and n.
my @joint;
my $rsections = [];
my $nbeg_sec = 0;
my $nend_sec;
my $nmax_section = 0;
foreach my $nn ( 1 .. $nmax_start ) {
my $ibeg_1 = $ri_beg->[ $nn - 1 ];
my $iend_1 = $ri_end->[ $nn - 1 ];
my $iend_2 = $ri_end->[$nn];
my $ibeg_2 = $ri_beg->[$nn];
# Define certain good joint tokens
my $itok;
foreach my $itest ( $iend_1, $ibeg_2 ) {
my $type = $types_to_go[$itest];
if ( $is_math_op{$type}
|| $is_amp_amp{$type}
|| $is_assignment{$type}
|| $type eq ':' )
{
$itok = $itest;
}
}
# joint[$nn] = index of joint character
$joint[$nn] = $itok;
# Update the section list
my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
if (
$excess <= 1
# The number 5 here is an arbitrary small number intended
# to keep most small matches in one sub-section.
|| ( defined($nend_sec)
&& ( $nn < 5 || $nmax_start - $nn < 5 ) )
)
{
$nend_sec = $nn;
}
else {
if ( defined($nend_sec) ) {
push @{$rsections}, [ $nbeg_sec, $nend_sec ];
my $num = $nend_sec - $nbeg_sec;
if ( $num > $nmax_section ) { $nmax_section = $num }
$nbeg_sec = $nn;
$nend_sec = undef;
}
$nbeg_sec = $nn;
}
}
if ( defined($nend_sec) ) {
push @{$rsections}, [ $nbeg_sec, $nend_sec ];
my $num = $nend_sec - $nbeg_sec;
if ( $num > $nmax_section ) { $nmax_section = $num }
}
my $num_sections = @{$rsections};
if ( DEBUG_RECOMBINE > 1 ) {
print {*STDOUT} <<EOM;
sections=$num_sections; nmax_sec=$nmax_section
EOM
}
if ( DEBUG_RECOMBINE > 0 ) {
my $max = 0;
print {*STDOUT}
"-----\n$num_sections sections found for nmax=$nmax_start\n";
foreach my $sect ( @{$rsections} ) {
my ( $nbeg, $nend ) = @{$sect};
my $num = $nend - $nbeg;
if ( $num > $max ) { $max = $num }
print {*STDOUT} "$nbeg $nend\n";
}
print {*STDOUT} "max size=$max of $nmax_start lines\n";
}
# Loop over all sub-sections. Note that we have to work backwards
# from the end of the batch since the sections use original line
# numbers, and the line numbers change as we go.
foreach my $section ( reverse @{$rsections} ) {
my ( $nbeg, $nend ) = @{$section};
$self->recombine_section_loop(
{
_ri_beg => $ri_beg,
_ri_end => $ri_end,
_nbeg => $nbeg,
_nend => $nend,
_rjoint => \@joint,
_rbond_strength_to_go => $rbond_strength_to_go,
_has_terminal_semicolon => $has_terminal_semicolon,
}
);
} ## end while ( my $section = pop...)
return;
} ## end sub recombine_breakpoints
sub recombine_section_loop {
my ( $self, $rhash ) = @_;
# Recombine breakpoints for one section of lines in the current batch
# Given:
# $ri_beg, $ri_end = ref to arrays with token indexes of the first
# and last line
# $nbeg, $nend = line numbers bounding this section
# $rjoint = ref to array of good joining tokens per line
# Update: $ri_beg, $ri_end, $rjoint if lines are joined
# Returns:
# nothing
#-------------
# Definitions:
#-------------
# $rhash = {
# _ri_beg = ref to array with starting token index by line
# _ri_end = ref to array with ending token index by line
# _nbeg = first line number of this section
# _nend = last line number of this section
# _rjoint = ref to array of good joining tokens for each line
# _rbond_strength_to_go = array of bond strengths
# _has_terminal_semicolon = true if last line of batch has ';'
# _num_freeze = fixed number of lines at end of this batch
# _optimization_on = true during final optimization loop
# _num_compares = total number of line compares made so far
# _pair_list = list of line pairs in optimal search order
# };
#-------------
# How it works
#-------------
# We are working with a sequence of output lines and looking at
# each pair. We must decide if it is better to join each of
# these line pairs.
# The brute force method is to loop through all line pairs and
# join the best possible pair, as determined by either some
# logical criterion or by the maximum 'bond strength' assigned
# to the joining token. Then keep doing this until there are
# no remaining line pairs to join.
# This works, but a problem is that it can theoretically take
# on the order of N^2 comparisons in some pathological cases.
# This can require an excessive amount of run time.
# We can avoid excessive run time by conceptually dividing the
# work into two phases. In the first phase we make any joints
# required by user settings or logic other than the strength of
# joints. In the second phase we make any remaining joints
# based on strengths. To do this optimally, we do a preliminary
# sort on joint strengths and always loop in that order. That
# way, we can stop a search on the first joint strength because
# it will be the maximum.
# This method is very fast, requiring no more than 3*N line
# comparisons, where N is the number of lines (see below).
my $ri_beg = $rhash->{_ri_beg};
my $ri_end = $rhash->{_ri_end};
# Line index range of this section:
my $nbeg = $rhash->{_nbeg}; # stays constant
my $nend = $rhash->{_nend}; # will decrease
# $nmax_batch = starting number of lines in the full batch
# $num_freeze = number of lines following this section to leave alone
my $nmax_batch = @{$ri_end} - 1;
$rhash->{_num_freeze} = $nmax_batch - $nend;
# Setup the list of line pairs to test. This stores the following
# values for each line pair:
# [ $n=index of the second line of the pair, $bs=bond strength]
my @pair_list;
my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
foreach my $n ( $nbeg + 1 .. $nend ) {
my $iend_1 = $ri_end->[ $n - 1 ];
my $ibeg_2 = $ri_beg->[$n];
my $bs_tweak = 0;
if ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) { $bs_tweak = 0.25 }
my $bs = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
push @pair_list, [ $n, $bs ];
}
# Any order for testing is possible, but optimization is only possible
# if we sort the line pairs on decreasing joint strength.
@pair_list =
sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @pair_list;
$rhash->{_rpair_list} = \@pair_list;
#----------------
# Iteration limit
#----------------
# This is now a very fast loop which runs in O(n) time, but a
# check on total number of iterations is retained to guard
# against future programming errors.
# Most cases require roughly 1 comparison per line pair (1 full pass).
# The upper bound is estimated to be about 3 comparisons per line pair
# unless optimization is deactivated. The approximate breakdown is:
# 1 pass with 1 compare per joint to do any special cases, plus
# 1 pass with up to 2 compares per joint in optimization mode
# The most extreme cases in my collection are:
# camel1.t - needs 2.7 compares per line (12 without optimization)
# ternary.t - needs 2.8 compares per line (12 without optimization)
# c206 - needs 3.3 compares per line, found with random testing
# So a value of MAX_COMPARE_RATIO = 4 looks like an upper bound as
# long as optimization is used. A value of 20 should allow all code to
# pass even if optimization is turned off for testing.
use constant MAX_COMPARE_RATIO => DEVEL_MODE ? 4 : 20;
my $num_pairs = $nend - $nbeg + 1;
my $max_compares = MAX_COMPARE_RATIO * $num_pairs;
# Always start with optimization off
$rhash->{_num_compares} = 0;
$rhash->{_optimization_on} = 0;
$rhash->{_ix_best_last} = 0;
#--------------------------------------------
# loop until there are no more recombinations
#--------------------------------------------
my $nmax_last = $nmax_batch + 1;
while (1) {
# Stop when the number of lines in the batch does not decrease
$nmax_batch = @{$ri_end} - 1;
if ( $nmax_batch >= $nmax_last ) {
last;
}
$nmax_last = $nmax_batch;
#-----------------------------------------
# inner loop to find next best combination
#-----------------------------------------
$self->recombine_inner_loop($rhash);
# Iteration limit check:
if ( $rhash->{_num_compares} > $max_compares ) {
# See note above; should only get here on a programming error
if (DEVEL_MODE) {
my $ibeg = $ri_beg->[$nbeg];
my $Kbeg = $K_to_go[$ibeg];
my $lno = $self->[_rLL_]->[$Kbeg]->[_LINE_INDEX_];
Fault(<<EOM);
inner loop passes =$rhash->{_num_compares} exceeds max=$max_compares, near line $lno
EOM
}
last;
}
} ## end while (1)
if (DEBUG_RECOMBINE) {
my $ratio = sprintf "%0.3f", $rhash->{_num_compares} / $num_pairs;
print {*STDOUT}
"exiting recombine_inner_loop with $nmax_last lines, opt=$rhash->{_optimization_on}, starting pairs=$num_pairs, num_compares=$rhash->{_num_compares}, ratio=$ratio\n";
}
return;
} ## end sub recombine_section_loop
sub recombine_inner_loop {
my ( $self, $rhash ) = @_;
# This is the inner loop of the recombine operation. We are working on
# a sequence of multiple lines. We look at each pair of lines and
# decide if formatting would be improved if the pair were joined
# into a single line. If there are multiple of such possible
# recombinations, we select the best. If a recombination is made,
# the number of lines in this group of lines will be reduced by one.
# See comments in the calling routine for further explanation.
# Input:
# $rhash has parameters controlling this recombine operation
# Returns: nothing
my $rK_weld_right = $self->[_rK_weld_right_];
my $rK_weld_left = $self->[_rK_weld_left_];
my $ri_beg = $rhash->{_ri_beg};
my $ri_end = $rhash->{_ri_end};
my $nbeg = $rhash->{_nbeg};
my $rjoint = $rhash->{_rjoint};
my $rbond_strength_to_go = $rhash->{_rbond_strength_to_go};
my $rpair_list = $rhash->{_rpair_list};
# This will remember the best joint:
my $n_best = 0;
my $bs_best = 0.;
my $ix_best = 0;
my $num_bs = 0;
# The index range of lines in this group is $nbeg to $nstop
my $nmax = @{$ri_end} - 1;
my $nstop = $nmax - $rhash->{_num_freeze};
my $num_joints = $nstop - $nbeg;
# Turn off optimization if just two joints remain to allow
# special two-line logic to be checked (c193)
if ( $rhash->{_optimization_on} && $num_joints <= 2 ) {
$rhash->{_optimization_on} = 0;
}
# Start where we ended the last search
my $ix_start = $rhash->{_ix_best_last};
# Keep the starting index in bounds
$ix_start = max( 0, $ix_start );
# Make a search order list which cycles around to visit
# all line pairs.
my $ix_max = @{$rpair_list} - 1;
my @ix_list = ( $ix_start .. $ix_max, 0 .. $ix_start - 1 );
my $ix_last = $ix_list[-1];
#-------------------------
# loop over all line pairs
#-------------------------
my $incomplete_loop;
foreach my $ix (@ix_list) {
my $item = $rpair_list->[$ix];
my ( $n, $bs ) = @{$item};
# This flag will be true if we 'last' out of this loop early.
# We cannot turn on optimization if this is true.
$incomplete_loop = $ix != $ix_last;
# Update the count of the number of times through this inner loop
$rhash->{_num_compares}++;
#----------------------------------------------------------
# If we join the current pair of lines,
# line $n-1 will become the left part of the joined line
# line $n will become the right part of the joined line
#
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^
# |
# We want to decide if we should remove the line break
# between the tokens at $iend_1 and $ibeg_2
#
# We will apply a number of ad-hoc tests to see if joining
# here will look ok. The code will just move to the next
# pair if the join doesn't look good. If we get through
# the gauntlet of tests, the lines will be recombined.
#----------------------------------------------------------
#
# beginning and ending tokens of the lines we are working on
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $iend_2 = $ri_end->[$n];
my $ibeg_2 = $ri_beg->[$n];
# The combined line cannot be too long
my $excess = $self->excess_line_length( $ibeg_1, $iend_2, 1 );
next if ( $excess > 0 );
my $type_iend_1 = $types_to_go[$iend_1];
my $type_iend_2 = $types_to_go[$iend_2];
my $type_ibeg_1 = $types_to_go[$ibeg_1];
my $type_ibeg_2 = $types_to_go[$ibeg_2];
DEBUG_RECOMBINE > 1 && do {
print {*STDOUT}
"RECOMBINE: ix=$ix iend1=$iend_1 iend2=$iend_2 n=$n nmax=$nmax if=$ibeg_1 type=$type_ibeg_1 =$tokens_to_go[$ibeg_1] next_type=$type_ibeg_2 next_tok=$tokens_to_go[$ibeg_2]\n";
};
# If line $n is the last line, we set some flags and
# do any special checks for it
my $this_line_is_semicolon_terminated;
if ( $n == $nmax ) {
if ( $type_ibeg_2 eq '{' ) {
# join isolated ')' and '{' if requested (git #110)
if ( $rOpts_cuddled_paren_brace
&& $type_iend_1 eq '}'
&& $iend_1 == $ibeg_1
&& $ibeg_2 == $iend_2 )
{
if ( $tokens_to_go[$iend_1] eq ')'
&& $tokens_to_go[$ibeg_2] eq '{' )
{
$n_best = $n;
$ix_best = $ix;
last;
}
}
# otherwise, a terminal '{' should stay where it is
# unless preceded by a fat comma
next if ( $type_iend_1 ne '=>' );
}
$this_line_is_semicolon_terminated =
$rhash->{_has_terminal_semicolon};
}
#----------------------------------------------------------
# Recombine Section 0:
# Examine the special token joining this line pair, if any.
# Put as many tests in this section to avoid duplicate code
# and to make formatting independent of whether breaks are
# to the left or right of an operator.
#----------------------------------------------------------
my $itok = $rjoint->[$n];
if ($itok) {
my $ok_0 = recombine_section_0( $itok, $ri_beg, $ri_end, $n );
next if ( !$ok_0 );
}
#----------------------------------------------------------
# Recombine Section 1:
# Join welded nested containers immediately
#----------------------------------------------------------
if (
$total_weld_count
&& ( $type_sequence_to_go[$iend_1]
&& defined( $rK_weld_right->{ $K_to_go[$iend_1] } )
|| $type_sequence_to_go[$ibeg_2]
&& defined( $rK_weld_left->{ $K_to_go[$ibeg_2] } ) )
)
{
$n_best = $n;
$ix_best = $ix;
last;
}
#----------------------------------------------------------
# Recombine Section 2:
# Examine token at $iend_1 (right end of first line of pair)
#----------------------------------------------------------
my ( $ok_2, $skip_Section_3 ) =
recombine_section_2( $ri_beg, $ri_end, $n,
$this_line_is_semicolon_terminated );
next if ( !$ok_2 );
#----------------------------------------------------------
# Recombine Section 3:
# Examine token at $ibeg_2 (left end of second line of pair)
#----------------------------------------------------------
# Join lines identified above as capable of
# causing an outdented line with leading closing paren.
# Note that we are skipping the rest of this section
# and the rest of the loop to do the join.
if ($skip_Section_3) {
$forced_breakpoint_to_go[$iend_1] = 0;
$n_best = $n;
$ix_best = $ix;
$incomplete_loop = 1;
last;
}
my ( $ok_3, $bs_tweak ) =
recombine_section_3( $ri_beg, $ri_end, $n,
$this_line_is_semicolon_terminated );
next if ( !$ok_3 );
#----------------------------------------------------------
# Recombine Section 4:
# Combine the lines if we arrive here and it is possible
#----------------------------------------------------------
# honor hard breakpoints
next if ( $forced_breakpoint_to_go[$iend_1] );
if (DEVEL_MODE) {
# This fault can only occur if an array index error has been
# introduced by a recent programming change.
my $bs_check = $rbond_strength_to_go->[$iend_1] + $bs_tweak;
if ( $bs_check != $bs ) {
Fault(<<EOM);
bs=$bs != $bs_check for break after type $type_iend_1 ix=$ix n=$n
EOM
}
}
# Require a few extra spaces before recombining lines if we
# are at an old breakpoint unless this is a simple list or
# terminal line. The goal is to avoid oscillating between
# two quasi-stable end states. For example this snippet
# caused problems:
## my $this =
## bless {
## TText => "[" . ( join ',', map { "\"$_\"" } split "\n", $_ ) . "]"
## },
## $type;
next
if ( $old_breakpoint_to_go[$iend_1]
&& !$this_line_is_semicolon_terminated
&& $n < $nmax
&& $excess + 4 > 0
&& $type_iend_2 ne ',' );
# do not recombine if we would skip in indentation levels
if ( $n < $nmax ) {
my $if_next = $ri_beg->[ $n + 1 ];
my $level_1 = $levels_to_go[$ibeg_1];
my $level_2 = $levels_to_go[$ibeg_2];
my $level_if_next = $levels_to_go[$if_next];
next
if (
$level_1 < $level_2
&& $level_2 < $level_if_next
# but an isolated 'if (' is undesirable
&& !(
$n == 1
&& $iend_1 - $ibeg_1 <= 2
&& $type_ibeg_1 eq 'k'
&& $tokens_to_go[$ibeg_1] eq 'if'
&& $tokens_to_go[$iend_1] ne '('
)
);
}
## OLD: honor no-break's
## next if ( $bs >= NO_BREAK - 1 ); # removed for b1257
# remember the pair with the greatest bond strength
if ( !$n_best ) {
# First good joint ...
$n_best = $n;
$ix_best = $ix;
$bs_best = $bs;
$num_bs = 1;
# In optimization mode: stop on the first acceptable joint
# because we already know it has the highest strength
if ( $rhash->{_optimization_on} == 1 ) {
last;
}
}
else {
# Second and later joints ..
$num_bs++;
# save maximum strength; in case of a tie select min $n
if ( $bs > $bs_best || $bs == $bs_best && $n < $n_best ) {
$n_best = $n;
$ix_best = $ix;
$bs_best = $bs;
}
}
} ## end loop over all line pairs
#---------------------------------------------------
# recombine the pair with the greatest bond strength
#---------------------------------------------------
if ($n_best) {
DEBUG_RECOMBINE > 1
&& print "BEST: nb=$n_best nbeg=$nbeg stop=$nstop bs=$bs_best\n";
splice @{$ri_beg}, $n_best, 1;
splice @{$ri_end}, $n_best - 1, 1;
splice @{$rjoint}, $n_best, 1;
splice @{$rpair_list}, $ix_best, 1;
# Update the line indexes in the pair list:
# Old $n values greater than the best $n decrease by 1
# because of the splice we just did.
foreach my $item ( @{$rpair_list} ) {
my $n_old = $item->[0];
if ( $n_old > $n_best ) { $item->[0] -= 1 }
}
# Store the index of this location for starting the next search.
# We must subtract 1 to get an updated index because the splice
# above just removed the best pair.
# BUT CAUTION: if this is the first pair in the pair list, then
# this produces an invalid index. So this index must be tested
# before use in the next pass through the outer loop.
$rhash->{_ix_best_last} = $ix_best - 1;
# Turn on optimization if ...
if (
# it is not already on, and
!$rhash->{_optimization_on}
# we have not taken a shortcut to get here, and
&& !$incomplete_loop
# we have seen a good break on strength, and
&& $num_bs
)
{
# To deactivate optimization for testing purposes, the next
# line can be commented out. This will increase run time.
$rhash->{_optimization_on} = 1;
if (DEBUG_RECOMBINE) {
my $num_compares = $rhash->{_num_compares};
my $pair_count = @ix_list;
print {*STDOUT}
"Entering optimization phase at $num_compares compares, pair count = $pair_count\n";
}
}
}
return;
} ## end sub recombine_inner_loop
sub recombine_section_0 {
my ( $itok, $ri_beg, $ri_end, $n ) = @_;
# Recombine Section 0:
# Examine special candidate joining token $itok
# Given:
# $itok = index of token at a possible join of lines $n-1 and $n
# Return:
# true => ok to combine
# false => do not combine lines
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^ ^
# | |
# ------------$itok is one of these tokens
# Put as many tests in this section to avoid duplicate code
# and to make formatting independent of whether breaks are
# to the left or right of an operator.
my $nmax = @{$ri_end} - 1;
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
## my $ibeg_2 = $ri_beg->[$n];
my $iend_2 = $ri_end->[$n];
if ($itok) {
my $type = $types_to_go[$itok];
if ( $type eq ':' ) {
# do not join at a colon unless it disobeys the
# break request
if ( $itok eq $iend_1 ) {
return unless $want_break_before{$type};
}
else {
return if $want_break_before{$type};
}
} ## end if ':'
# handle math operators + - * /
elsif ( $is_math_op{$type} ) {
# Combine these lines if this line is a single
# number, or if it is a short term with same
# operator as the previous line. For example, in
# the following code we will combine all of the
# short terms $A, $B, $C, $D, $E, $F, together
# instead of leaving them one per line:
# my $time =
# $A * $B * $C * $D * $E * $F *
# ( 2. * $eps * $sigma * $area ) *
# ( 1. / $tcold**3 - 1. / $thot**3 );
# This can be important in math-intensive code.
my $good_combo;
my $itokp = min( $inext_to_go[$itok], $iend_2 );
my $itokpp = min( $inext_to_go[$itokp], $iend_2 );
my $itokm = max( iprev_to_go($itok), $ibeg_1 );
my $itokmm = max( iprev_to_go($itokm), $ibeg_1 );
# check for a number on the right
if ( $types_to_go[$itokp] eq 'n' ) {
# ok if nothing else on right
if ( $itokp == $iend_2 ) {
$good_combo = 1;
}
else {
# look one more token to right..
# okay if math operator or some termination
$good_combo =
( ( $itokpp == $iend_2 )
&& $is_math_op{ $types_to_go[$itokpp] } )
|| $types_to_go[$itokpp] =~ /^[#,;]$/;
}
}
# check for a number on the left
if ( !$good_combo && $types_to_go[$itokm] eq 'n' ) {
# okay if nothing else to left
if ( $itokm == $ibeg_1 ) {
$good_combo = 1;
}
# otherwise look one more token to left
else {
# okay if math operator, comma, or assignment
$good_combo = ( $itokmm == $ibeg_1 )
&& ( $is_math_op{ $types_to_go[$itokmm] }
|| $types_to_go[$itokmm] =~ /^[,]$/
|| $is_assignment{ $types_to_go[$itokmm] } );
}
}
# look for a single short token either side of the
# operator
if ( !$good_combo ) {
# Slight adjustment factor to make results
# independent of break before or after operator
# in long summed lists. (An operator and a
# space make two spaces).
my $two = ( $itok eq $iend_1 ) ? 2 : 0;
$good_combo =
# numbers or id's on both sides of this joint
$types_to_go[$itokp] =~ /^[in]$/
&& $types_to_go[$itokm] =~ /^[in]$/
# one of the two lines must be short:
&& (
(
# no more than 2 nonblank tokens right
# of joint
$itokpp == $iend_2
# short
&& token_sequence_length( $itokp, $iend_2 ) <
$two + $rOpts_short_concatenation_item_length
)
|| (
# no more than 2 nonblank tokens left of
# joint
$itokmm == $ibeg_1
# short
&& token_sequence_length( $ibeg_1, $itokm ) <
2 - $two + $rOpts_short_concatenation_item_length
)
)
# keep pure terms; don't mix +- with */
&& !(
$is_plus_minus{$type}
&& ( $is_mult_div{ $types_to_go[$itokmm] }
|| $is_mult_div{ $types_to_go[$itokpp] } )
)
&& !(
$is_mult_div{$type}
&& ( $is_plus_minus{ $types_to_go[$itokmm] }
|| $is_plus_minus{ $types_to_go[$itokpp] } )
)
;
}
# it is also good to combine if we can reduce to 2
# lines
if ( !$good_combo ) {
# index on other line where same token would be
# in a long chain.
my $iother = ( $itok == $iend_1 ) ? $iend_2 : $ibeg_1;
$good_combo =
$n == 2
&& $n == $nmax
&& $types_to_go[$iother] ne $type;
}
return unless ($good_combo);
} ## end math
elsif ( $is_amp_amp{$type} ) {
##TBD
} ## end &&, ||
elsif ( $is_assignment{$type} ) {
##TBD
}
else {
# not a special type
}
## end assignment
}
# ok to combine lines
return 1;
} ## end sub recombine_section_0
sub recombine_section_2 {
my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
# Recombine Section 2:
# We are comparing two lines to see if they should be combined
# into a single line. This sub examines the token '$iend_1' in
# the following diagram (right end of first line of pair):
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^
# |
# -----Section 2 looks at this token
# Returns:
# (nothing) => do not join lines
# 1, skip_Section_3 => ok to join lines
# $skip_Section_3 is a flag for skipping the next section
my $skip_Section_3 = 0;
my $nmax = @{$ri_end} - 1;
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $iend_2 = $ri_end->[$n];
my $ibeg_2 = $ri_beg->[$n];
my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
my $ibeg_nmax = $ri_beg->[$nmax];
my $type_iend_1 = $types_to_go[$iend_1];
my $type_iend_2 = $types_to_go[$iend_2];
my $type_ibeg_1 = $types_to_go[$ibeg_1];
my $type_ibeg_2 = $types_to_go[$ibeg_2];
# an isolated '}' may join with a ';' terminated segment
if ( $type_iend_1 eq '}' ) {
# Check for cases where combining a semicolon terminated
# statement with a previous isolated closing paren will
# allow the combined line to be outdented. This is
# generally a good move. For example, we can join up
# the last two lines here:
# (
# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
# $size, $atime, $mtime, $ctime, $blksize, $blocks
# )
# = stat($file);
#
# to get:
# (
# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
# $size, $atime, $mtime, $ctime, $blksize, $blocks
# ) = stat($file);
#
# which makes the parens line up.
#
# Another example, from Joe Matarazzo, probably looks best
# with the 'or' clause appended to the trailing paren:
# $self->some_method(
# PARAM1 => 'foo',
# PARAM2 => 'bar'
# ) or die "Some_method didn't work";
#
# But we do not want to do this for something like the -lp
# option where the paren is not outdentable because the
# trailing clause will be far to the right.
#
# The logic here is synchronized with the logic in sub
# sub get_final_indentation, which actually does
# the outdenting.
#
my $combine_ok = $this_line_is_semicolon_terminated
# only one token on last line
&& $ibeg_1 == $iend_1
# must be structural paren
&& $tokens_to_go[$iend_1] eq ')'
# style must allow outdenting,
&& !$closing_token_indentation{')'}
# but leading colons probably line up with a
# previous colon or question (count could be wrong).
&& $type_ibeg_2 ne ':'
# only one step in depth allowed. this line must not
# begin with a ')' itself.
&& ( $nesting_depth_to_go[$iend_1] ==
$nesting_depth_to_go[$iend_2] + 1 );
# But only combine leading '&&', '||', if no previous && || :
# seen. This count includes these tokens at all levels. The
# idea is that seeing these at any level can make it hard to read
# formatting if we recombine.
if ( $is_amp_amp{$type_ibeg_2} ) {
foreach my $n_t ( reverse( 0 .. $n - 2 ) ) {
my $ibeg_t = $ri_beg->[$n_t];
my $type_t = $types_to_go[$ibeg_t];
if ( $is_amp_amp{$type_t} || $type_t eq ':' ) {
$combine_ok = 0;
last;
}
}
}
$skip_Section_3 ||= $combine_ok;
# YVES patch 2 of 2:
# Allow cuddled eval chains, like this:
# eval {
# #STUFF;
# 1; # return true
# } or do {
# #handle error
# };
# This patch works together with a patch in
# setting adjusted indentation (where the closing eval
# brace is outdented if possible).
# The problem is that an 'eval' block has continuation
# indentation and it looks better to undo it in some
# cases. If we do not use this patch we would get:
# eval {
# #STUFF;
# 1; # return true
# }
# or do {
# #handle error
# };
# The alternative, for uncuddled style, is to create
# a patch in get_final_indentation which undoes
# the indentation of a leading line like 'or do {'.
# This doesn't work well with -icb through
if (
$block_type_to_go[$iend_1]
&& $rOpts_brace_follower_vertical_tightness > 0
&& (
# -bfvt=1, allow cuddled eval chains [default]
(
$tokens_to_go[$iend_2] eq '{'
&& $block_type_to_go[$iend_1] eq 'eval'
&& !ref( $leading_spaces_to_go[$iend_1] )
&& !$rOpts_indent_closing_brace
)
# -bfvt=2, allow most brace followers [part of git #110]
|| ( $rOpts_brace_follower_vertical_tightness > 1
&& $ibeg_1 == $iend_1 )
)
&& (
( $type_ibeg_2 =~ /^(\&\&|\|\|)$/ )
|| ( $type_ibeg_2 eq 'k'
&& $is_and_or{ $tokens_to_go[$ibeg_2] } )
|| $is_if_unless{ $tokens_to_go[$ibeg_2] }
)
)
{
$skip_Section_3 ||= 1;
}
my $keep_going = (
$skip_Section_3
# handle '.' and '?' specially below
|| ( $type_ibeg_2 =~ /^[\.\?]$/ )
# fix for c054 (unusual -pbp case)
|| $type_ibeg_2 eq '=='
);
return unless ($keep_going);
}
elsif ( $type_iend_1 eq '{' ) {
# YVES
# honor breaks at opening brace
# Added to prevent recombining something like this:
# } || eval { package main;
return if ( $forced_breakpoint_to_go[$iend_1] );
}
# do not recombine lines with ending &&, ||,
elsif ( $is_amp_amp{$type_iend_1} ) {
return unless ( $want_break_before{$type_iend_1} );
}
# Identify and recombine a broken ?/: chain
elsif ( $type_iend_1 eq '?' ) {
# Do not recombine different levels
return
if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
# do not recombine unless next line ends in :
return unless ( $type_iend_2 eq ':' );
}
# for lines ending in a comma...
elsif ( $type_iend_1 eq ',' ) {
# Do not recombine at comma which is following the
# input bias.
# NOTE: this could be controlled by a special flag,
# but it seems to work okay.
return if ( $old_breakpoint_to_go[$iend_1] );
# An isolated '},' may join with an identifier + ';'
# This is useful for the class of a 'bless' statement
# (bless.t)
if ( $type_ibeg_1 eq '}'
&& $type_ibeg_2 eq 'i' )
{
my $combine_ok =
( ( $ibeg_1 == ( $iend_1 - 1 ) )
&& ( $iend_2 == ( $ibeg_2 + 1 ) )
&& $this_line_is_semicolon_terminated );
return if ( !$combine_ok );
# override breakpoint
$forced_breakpoint_to_go[$iend_1] = 0;
}
# but otherwise ..
else {
# do not recombine after a comma unless this will
# leave just 1 more line
return if ( $n + 1 < $nmax );
# do not recombine if there is a change in
# indentation depth
return
if ( $levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
# do not recombine a "complex expression" after a
# comma. "complex" means no parens.
my $saw_paren;
foreach my $ii ( $ibeg_2 .. $iend_2 ) {
if ( $tokens_to_go[$ii] eq '(' ) {
$saw_paren = 1;
last;
}
}
return if $saw_paren;
}
}
# opening paren..
elsif ( $type_iend_1 eq '(' ) {
# No longer doing this
}
elsif ( $type_iend_1 eq ')' ) {
# No longer doing this
}
# keep a terminal for-semicolon
elsif ( $type_iend_1 eq 'f' ) {
return;
}
# if '=' at end of line ...
elsif ( $is_assignment{$type_iend_1} ) {
# keep break after = if it was in input stream
# this helps prevent 'blinkers'
return
if (
$old_breakpoint_to_go[$iend_1]
# don't strand an isolated '='
&& $iend_1 != $ibeg_1
);
my $is_short_quote =
( $type_ibeg_2 eq 'Q'
&& $ibeg_2 == $iend_2
&& token_sequence_length( $ibeg_2, $ibeg_2 ) <
$rOpts_short_concatenation_item_length );
my $is_ternary_joint = (
$type_ibeg_1 eq '?' && ( $ibeg_3 >= 0
&& $types_to_go[$ibeg_3] eq ':' )
);
# always join an isolated '=', a short quote, or if this
# will put ?/: at start of adjacent lines
if ( $ibeg_1 != $iend_1
&& !$is_short_quote
&& !$is_ternary_joint )
{
my $combine_ok = (
(
# unless we can reduce this to two lines
$nmax < $n + 2
# or three lines, the last with a leading
# semicolon
|| ( $nmax == $n + 2
&& $types_to_go[$ibeg_nmax] eq ';' )
# or the next line ends with a here doc
|| $type_iend_2 eq 'h'
# or the next line ends in an open paren or
# brace and the break hasn't been forced
# [dima.t]
|| (!$forced_breakpoint_to_go[$iend_1]
&& $type_iend_2 eq '{' )
)
# do not recombine if the two lines might align
# well this is a very approximate test for this
&& (
# RT#127633 - the leading tokens are not
# operators
( $type_ibeg_2 ne $tokens_to_go[$ibeg_2] )
# or they are different
|| ( $ibeg_3 >= 0
&& $type_ibeg_2 ne $types_to_go[$ibeg_3] )
)
);
return if ( !$combine_ok );
if (
# Recombine if we can make two lines
$nmax >= $n + 2
# -lp users often prefer this:
# my $title = function($env, $env, $sysarea,
# "bubba Borrower Entry");
# so we will recombine if -lp is used we have
# ending comma
&& !(
$ibeg_3 > 0
&& ref( $leading_spaces_to_go[$ibeg_3] )
&& $type_iend_2 eq ','
)
)
{
# otherwise, scan the rhs line up to last token for
# complexity. Note that we are not counting the last token
# in case it is an opening paren.
my $ok = simple_rhs( $ri_end, $n, $nmax, $ibeg_2, $iend_2 );
return if ( !$ok );
}
}
if ( $tokens_to_go[$ibeg_2] !~ /^[\{\(\[]$/ ) {
$forced_breakpoint_to_go[$iend_1] = 0;
}
}
# for keywords..
elsif ( $type_iend_1 eq 'k' ) {
# make major control keywords stand out
# (recombine.t)
return
if (
#/^(last|next|redo|return)$/
$is_last_next_redo_return{ $tokens_to_go[$iend_1] }
# but only if followed by multiple lines
&& $n < $nmax
);
if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
return
unless $want_break_before{ $tokens_to_go[$iend_1] };
}
}
elsif ( $type_iend_1 eq '.' ) {
# NOTE: the logic here should match that of section 3 so that
# line breaks are independent of choice of break before or after.
# It would be nice to combine them in section 0, but the
# special junction case ') .' makes that difficult.
# This section added to fix issues c172, c174.
my $i_next_nonblank = $ibeg_2;
my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
$summed_lengths_to_go[$ibeg_1];
my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
$summed_lengths_to_go[$ibeg_2];
my $iend_1_minus = max( $ibeg_1, iprev_to_go($iend_1) );
my $combine_ok = $pack_operator_types{'.'};
$combine_ok ||= (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
#
#
# $bodyA .=
# '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
#
# looks better than this:
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;' .
# '$args .= $pat;'
# check for 2 lines, not in a long broken '.' chain
( $n == 2 && $n == $nmax && $type_iend_1 ne $type_iend_2 )
# ... or this would strand a short quote , like this
# "some long quote" .
# "\n";
|| (
$types_to_go[$i_next_nonblank] eq 'Q'
&& $i_next_nonblank >= $iend_2 - 2
&& $token_lengths_to_go[$i_next_nonblank] <
$rOpts_short_concatenation_item_length
# additional constraints to fix c167
&& ( $types_to_go[$iend_1_minus] ne 'Q'
|| $summed_len_2 < $summed_len_1 )
)
);
return if ( !$combine_ok );
# added for issue c352
if ($this_line_is_semicolon_terminated) {
$forced_breakpoint_to_go[$iend_1] = 0;
}
}
else {
# not a special type
}
return ( 1, $skip_Section_3 );
} ## end sub recombine_section_2
sub simple_rhs {
my ( $ri_end, $n, $nmax, $ibeg_2, $iend_2 ) = @_;
# Scan line ibeg_2 to $iend_2 up to last token for complexity.
# We are not counting the last token in case it is an opening paren.
# Given:
# $ri_end - ref to list of indexes of line-ending tokens
# $n = current line index
# $nmax = maximum line index
# ($ibeg_2, $iend_2) = index range of line to scan
# Return:
# true if rhs is simple, ok to recombine
# false otherwise
my $tv = 0;
my $depth = $nesting_depth_to_go[$ibeg_2];
foreach my $i ( $ibeg_2 + 1 .. $iend_2 - 1 ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
last if ( $tv > 1 );
}
$depth = $nesting_depth_to_go[$i];
}
# ok to recombine if no level changes before
# last token
if ( $tv > 0 ) {
# otherwise, do not recombine if more than
# two level changes.
return if ( $tv > 1 );
# check total complexity of the two
# adjacent lines that will occur if we do
# this join
my $istop =
( $n < $nmax )
? $ri_end->[ $n + 1 ]
: $iend_2;
foreach my $i ( $iend_2 .. $istop ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
last if ( $tv > 2 );
}
$depth = $nesting_depth_to_go[$i];
}
# do not recombine if total is more than 2
# level changes
return if ( $tv > 2 );
}
return 1;
} ## end sub simple_rhs
sub recombine_section_3 {
my ( $ri_beg, $ri_end, $n, $this_line_is_semicolon_terminated ) = @_;
# Recombine Section 3:
# We are comparing two lines to see if they should be combined
# into a single line. This sub examines the token '$ibeg_2' in
# the following diagram (left end of second line of pair):
# Here are Indexes of the endpoint tokens of the two lines:
#
# -----line $n-1--- | -----line $n-----
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
# ^
# |
# -----Section 3 looks at this token
# Returns:
# (nothing) => do not join lines
# 1, bs_tweak => ok to join lines
# $bstweak is a small tolerance to add to bond strengths
my $bs_tweak = 0;
my $nmax = @{$ri_end} - 1;
my $ibeg_1 = $ri_beg->[ $n - 1 ];
my $iend_1 = $ri_end->[ $n - 1 ];
my $iend_2 = $ri_end->[$n];
my $ibeg_2 = $ri_beg->[$n];
my $ibeg_0 = $n > 1 ? $ri_beg->[ $n - 2 ] : -1;
my $ibeg_3 = $n < $nmax ? $ri_beg->[ $n + 1 ] : -1;
my $ibeg_4 = $n + 2 <= $nmax ? $ri_beg->[ $n + 2 ] : -1;
my $ibeg_nmax = $ri_beg->[$nmax];
my $type_iend_1 = $types_to_go[$iend_1];
my $type_iend_2 = $types_to_go[$iend_2];
my $type_ibeg_1 = $types_to_go[$ibeg_1];
my $type_ibeg_2 = $types_to_go[$ibeg_2];
# handle lines with leading &&, ||
if ( $is_amp_amp{$type_ibeg_2} ) {
# ok to recombine if it follows a ? or :
# and is followed by an open paren..
my $ok =
( $is_ternary{$type_ibeg_1} && $tokens_to_go[$iend_2] eq '(' )
# or is followed by a ? or : at same depth
#
# We are looking for something like this. We can
# recombine the && line with the line above to make the
# structure more clear:
# return
# exists $G->{Attr}->{V}
# && exists $G->{Attr}->{V}->{$u}
# ? %{ $G->{Attr}->{V}->{$u} }
# : ();
#
# We should probably leave something like this alone:
# return
# exists $G->{Attr}->{E}
# && exists $G->{Attr}->{E}->{$u}
# && exists $G->{Attr}->{E}->{$u}->{$v}
# ? %{ $G->{Attr}->{E}->{$u}->{$v} }
# : ();
# so that we either have all of the &&'s (or ||'s)
# on one line, as in the first example, or break at
# each one as in the second example. However, it
# sometimes makes things worse to check for this because
# it prevents multiple recombinations. So this is not done.
|| ( $ibeg_3 >= 0
&& $is_ternary{ $types_to_go[$ibeg_3] }
&& $nesting_depth_to_go[$ibeg_3] ==
$nesting_depth_to_go[$ibeg_2] );
# Combine a trailing && term with an || term: fix for
# c060 This is rare but can happen.
$ok ||= 1
if ( $ibeg_3 < 0
&& $type_ibeg_2 eq '&&'
&& $type_ibeg_1 eq '||'
&& $nesting_depth_to_go[$ibeg_2] ==
$nesting_depth_to_go[$ibeg_1] );
return if !$ok && $want_break_before{$type_ibeg_2};
$forced_breakpoint_to_go[$iend_1] = 0;
# tweak the bond strength to give this joint priority
# over ? and :
$bs_tweak = 0.25;
}
# Identify and recombine a broken ?/: chain
elsif ( $type_ibeg_2 eq '?' ) {
# Do not recombine different levels
my $lev = $levels_to_go[$ibeg_2];
return if ( $lev ne $levels_to_go[$ibeg_1] );
# Do not recombine a '?' if either next line or
# previous line does not start with a ':'. The reasons
# are that (1) no alignment of the ? will be possible
# and (2) the expression is somewhat complex, so the
# '?' is harder to see in the interior of the line.
my $follows_colon = $ibeg_1 >= 0 && $type_ibeg_1 eq ':';
my $precedes_colon = $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
return unless ( $follows_colon || $precedes_colon );
# we will always combining a ? line following a : line
if ( !$follows_colon ) {
# ...otherwise recombine only if it looks like a
# chain. we will just look at a few nearby lines
# to see if this looks like a chain.
my $local_count = 0;
foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
$local_count++
if $ii >= 0
&& $types_to_go[$ii] eq ':'
&& $levels_to_go[$ii] == $lev;
}
return if ( $local_count <= 1 );
}
$forced_breakpoint_to_go[$iend_1] = 0;
}
# do not recombine lines with leading '.'
elsif ( $type_ibeg_2 eq '.' ) {
my $i_next_nonblank = min( $inext_to_go[$ibeg_2], $iend_2 );
my $summed_len_1 = $summed_lengths_to_go[ $iend_1 + 1 ] -
$summed_lengths_to_go[$ibeg_1];
my $summed_len_2 = $summed_lengths_to_go[ $iend_2 + 1 ] -
$summed_lengths_to_go[$ibeg_2];
my $combine_ok = $pack_operator_types{'.'};
$combine_ok ||= (
# ... unless there is just one and we can reduce
# this to two lines if we do. For example, this
#
#
# $bodyA .=
# '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
#
# looks better than this:
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
# . '$args .= $pat;'
( $n == 2 && $n == $nmax && $type_ibeg_1 ne $type_ibeg_2 )
# ... or this would strand a short quote , like this
# . "some long quote"
# . "\n";
|| (
$types_to_go[$i_next_nonblank] eq 'Q'
&& $i_next_nonblank >= $iend_2 - 1
&& $token_lengths_to_go[$i_next_nonblank] <
$rOpts_short_concatenation_item_length
# additional constraints to fix c167
&& (
$types_to_go[$iend_1] ne 'Q'
# allow a term shorter than the previous term
|| $summed_len_2 < $summed_len_1
# or allow a short semicolon-terminated term if this
# makes two lines (see c169)
|| ( $n == 2
&& $n == $nmax
&& $this_line_is_semicolon_terminated )
)
)
);
return if ( !$combine_ok );
# added for issue c352
if ($this_line_is_semicolon_terminated) {
$forced_breakpoint_to_go[$iend_1] = 0;
}
}
# handle leading keyword..
elsif ( $type_ibeg_2 eq 'k' ) {
# handle leading "or"
if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
my $combine_ok = (
$this_line_is_semicolon_terminated
&& (
$type_ibeg_1 eq '}'
|| (
# following 'if' or 'unless' or 'or'
$type_ibeg_1 eq 'k'
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
# important: only combine a very simple
# or statement because the step below
# may have combined a trailing 'and'
# with this or, and we do not want to
# then combine everything together
&& ( $iend_2 - $ibeg_2 <= 7 )
)
)
);
return if ( !$combine_ok );
#X: RT #81854
$forced_breakpoint_to_go[$iend_1] = 0
if ( !$old_breakpoint_to_go[$iend_1] );
}
# handle leading 'and' and 'xor'
elsif ($tokens_to_go[$ibeg_2] eq 'and'
|| $tokens_to_go[$ibeg_2] eq 'xor' )
{
# Decide if we will combine a single terminal 'and'
# after an 'if' or 'unless'.
# This looks best with the 'and' on the same
# line as the 'if':
#
# $a = 1
# if $seconds and $nu < 2;
#
# But this looks better as shown:
#
# $a = 1
# if !$this->{Parents}{$_}
# or $this->{Parents}{$_} eq $_;
#
my $combine_ok = $this_line_is_semicolon_terminated
&& (
# following 'if' or 'unless' or 'or'
$type_ibeg_1 eq 'k'
&& ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
|| $tokens_to_go[$ibeg_1] eq 'or' )
);
return if ( !$combine_ok );
}
# handle leading "if" and "unless"
elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
# Combine something like:
# next
# if ( $lang !~ /${l}$/i );
# into:
# next if ( $lang !~ /${l}$/i );
my $combine_ok = $this_line_is_semicolon_terminated
# previous line begins with 'and' or 'or'
&& $type_ibeg_1 eq 'k'
&& $is_and_or{ $tokens_to_go[$ibeg_1] };
return if ( !$combine_ok );
}
# handle all other leading keywords
else {
# keywords look best at start of lines,
# but combine things like "1 while"
if ( !$is_assignment{$type_iend_1} ) {
return
if ( ( $type_iend_1 ne 'k' )
&& ( $tokens_to_go[$ibeg_2] ne 'while' ) );
}
}
}
# similar treatment of && and || as above for 'and' and
# 'or': NOTE: This block of code is currently bypassed
# because of a previous block but is retained for possible
# future use.
elsif ( $is_amp_amp{$type_ibeg_2} ) {
# maybe looking at something like:
# unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
my $combine_ok = $this_line_is_semicolon_terminated
# previous line begins with an 'if' or 'unless'
# keyword
&& $type_ibeg_1 eq 'k'
&& $is_if_unless{ $tokens_to_go[$ibeg_1] };
return if ( !$combine_ok );
}
# handle line with leading = or similar
elsif ( $is_assignment{$type_ibeg_2} ) {
return unless ( $n == 1 || $n == $nmax );
return if ( $old_breakpoint_to_go[$iend_1] );
my $combine_ok = (
# if we can reduce this to two lines
$nmax == 2
# or three lines, the last with a leading semicolon
|| ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
# or the next line ends with a here doc
|| $type_iend_2 eq 'h'
# or this is a short line ending in ;
|| ( $n == $nmax
&& $this_line_is_semicolon_terminated )
);
return if ( !$combine_ok );
$forced_breakpoint_to_go[$iend_1] = 0;
}
else {
}
return ( 1, $bs_tweak );
} ## end sub recombine_section_3
} ## end closure recombine_breakpoints
sub insert_final_ternary_breaks {
my ( $self, $ri_left, $ri_right ) = @_;
# Called once per batch to look for and do any final line breaks for
# long ternary chains
# Given:
# $ri_left = ref to array with token indexes of the left line ends
# $ri_right = ref to array with token indexes of the right line ends
my $nmax = @{$ri_right} - 1;
# scan the left and right end tokens of all lines
my $i_first_colon = -1;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
return if ( $typel eq '?' );
return if ( $typer eq '?' );
if ( $typel eq ':' ) { $i_first_colon = $il; last; }
if ( $typer eq ':' ) { $i_first_colon = $ir; last; }
}
# For long ternary chains,
# if the first : we see has its ? is in the interior
# of a preceding line, then see if there are any good
# breakpoints before the ?.
if ( $i_first_colon > 0 ) {
my $i_question = $mate_index_to_go[$i_first_colon];
if ( defined($i_question) && $i_question > 0 ) {
my @insert_list;
foreach my $ii ( reverse( 0 .. $i_question - 1 ) ) {
my $token = $tokens_to_go[$ii];
my $type = $types_to_go[$ii];
# For now, a good break is either a comma or,
# in a long chain, a 'return'.
# Patch for RT #126633: added the $nmax>1 check to avoid
# breaking after a return for a simple ternary. For longer
# chains the break after return allows vertical alignment, so
# it is still done. So perltidy -wba='?' will not break
# immediately after the return in the following statement:
# sub x {
# return 0 ? 'aaaaaaaaaaaaaaaaaaaaa' :
# 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb';
# }
if (
(
$type eq ','
|| $type eq 'k' && ( $nmax > 1 && $token eq 'return' )
)
&& $self->in_same_container_i( $ii, $i_question )
)
{
push @insert_list, $ii;
last;
}
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left,
$ri_right );
}
}
}
return;
} ## end sub insert_final_ternary_breaks
sub insert_breaks_before_list_opening_containers {
my ( $self, $ri_left, $ri_right ) = @_;
# This routine is called once per batch to implement the parameters
# --break-before-hash-brace, etc.
# Given:
# $ri_left = ref to array with token indexes of the left line ends
# $ri_right = ref to array with token indexes of the right line ends
# Nothing to do if none of these parameters has been set
return unless %break_before_container_types;
my $nmax = @{$ri_right} - 1;
return if ( $nmax < 0 );
my $rLL = $self->[_rLL_];
my $rbreak_before_container_by_seqno =
$self->[_rbreak_before_container_by_seqno_];
my $rK_weld_left = $self->[_rK_weld_left_];
# scan the ends of all lines
my @insert_list;
for my $n ( 0 .. $nmax ) {
my $il = $ri_left->[$n];
my $ir = $ri_right->[$n];
next if ( $ir <= $il );
my $Kl = $K_to_go[$il];
my $Kr = $K_to_go[$ir];
my $Kend = $Kr;
my $type_end = $rLL->[$Kr]->[_TYPE_];
# Backup before any side comment
if ( $type_end eq '#' ) {
$Kend = $self->K_previous_nonblank($Kr);
next unless defined($Kend);
$type_end = $rLL->[$Kend]->[_TYPE_];
}
# Backup to the start of any weld; fix for b1173.
if ($total_weld_count) {
my $Kend_test = $rK_weld_left->{$Kend};
if ( defined($Kend_test) && $Kend_test > $Kl ) {
$Kend = $Kend_test;
$Kend_test = $rK_weld_left->{$Kend};
}
# Do not break if we did not back up to the start of a weld
# (shouldn't happen)
next if ( defined($Kend_test) );
}
my $token = $rLL->[$Kend]->[_TOKEN_];
next if ( !$is_opening_token{$token} );
next if ( $Kl >= $Kend - 1 );
my $seqno = $rLL->[$Kend]->[_TYPE_SEQUENCE_];
next if ( !defined($seqno) );
# Use the flag which was previously set
next unless ( $rbreak_before_container_by_seqno->{$seqno} );
# Install a break before this opening token.
my $Kbreak = $self->K_previous_nonblank($Kend);
my $ibreak = $Kbreak - $Kl + $il;
next if ( $ibreak < $il );
next if ( $nobreak_to_go[$ibreak] );
push @insert_list, $ibreak;
}
# insert any new break points
if (@insert_list) {
$self->insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
return;
} ## end sub insert_breaks_before_list_opening_containers
sub note_added_semicolon {
my ( $self, $line_number ) = @_;
$self->[_last_added_semicolon_at_] = $line_number;
if ( $self->[_added_semicolon_count_] == 0 ) {
$self->[_first_added_semicolon_at_] = $line_number;
}
$self->[_added_semicolon_count_]++;
write_logfile_entry("Added ';' here\n");
return;
} ## end sub note_added_semicolon
sub note_deleted_semicolon {
my ( $self, $line_number ) = @_;
$self->[_last_deleted_semicolon_at_] = $line_number;
if ( $self->[_deleted_semicolon_count_] == 0 ) {
$self->[_first_deleted_semicolon_at_] = $line_number;
}
$self->[_deleted_semicolon_count_]++;
write_logfile_entry("Deleted unnecessary ';' at line $line_number\n");
return;
} ## end sub note_deleted_semicolon
sub note_embedded_tab {
my ( $self, $line_number ) = @_;
$self->[_embedded_tab_count_]++;
$self->[_last_embedded_tab_at_] = $line_number;
if ( !$self->[_first_embedded_tab_at_] ) {
$self->[_first_embedded_tab_at_] = $line_number;
}
if ( $self->[_embedded_tab_count_] <= MAX_NAG_MESSAGES ) {
write_logfile_entry("Embedded tabs in quote or pattern\n");
}
return;
} ## end sub note_embedded_tab
use constant DEBUG_CORRECT_LP => 0;
sub correct_lp_indentation {
my ($self) = @_;
# When the -lp option is used, we need to make a last pass through
# each line to correct the indentation positions in case they differ
# from the predictions. This is necessary because perltidy uses a
# predictor/corrector method for aligning with opening parens. The
# predictor is usually good, but sometimes stumbles. The corrector
# tries to patch things up once the actual opening paren locations
# are known.
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
# first remove continuation indentation if appropriate
my $max_line = @{$ri_first} - 1;
#---------------------------------------------------------------------------
# PASS 1: reduce indentation if necessary at any long one-line blocks (c098)
#---------------------------------------------------------------------------
# The point is that sub 'starting_one_line_block' made one-line blocks based
# on default indentation, not -lp indentation. So some of the one-line
# blocks may be too long when given -lp indentation. We will fix that now
# if possible, using the list of these closing block indexes.
my $ri_starting_one_line_block =
$this_batch->[_ri_starting_one_line_block_];
if ( @{$ri_starting_one_line_block} ) {
$self->correct_lp_indentation_pass_1( $ri_first, $ri_last,
$ri_starting_one_line_block );
}
#-------------------------------------------------------------------
# PASS 2: look for and fix other problems in each line of this batch
#-------------------------------------------------------------------
# look at each output line ...
foreach my $line ( 0 .. $max_line ) {
my $ibeg = $ri_first->[$line];
my $iend = $ri_last->[$line];
# looking at each token in this output line ...
foreach my $i ( $ibeg .. $iend ) {
# How many space characters to place before this token
# for special alignment. Actual padding is done in the
# continue block.
# looking for next unvisited indentation item ...
my $indentation = $leading_spaces_to_go[$i];
# This is just for indentation objects (c098)
next unless ( ref($indentation) );
# Visit each indentation object just once
next if ( $indentation->get_marked() );
# Mark first visit
$indentation->set_marked(1);
# Skip indentation objects which do not align with container tokens
my $align_seqno = $indentation->get_align_seqno();
next unless ($align_seqno);
# Skip a container which is entirely on this line
my $Ko = $self->[_K_opening_container_]->{$align_seqno};
my $Kc = $self->[_K_closing_container_]->{$align_seqno};
if ( defined($Ko) && defined($Kc) ) {
next if ( $Ko >= $K_to_go[$ibeg] && $Kc <= $K_to_go[$iend] );
}
# Note on flag '$do_not_pad':
# We want to avoid a situation like this, where the aligner
# inserts whitespace before the '=' to align it with a previous
# '=', because otherwise the parens might become mis-aligned in a
# situation like this, where the '=' has become aligned with the
# previous line, pushing the opening '(' forward beyond where we
# want it.
#
# $mkFloor::currentRoom = '';
# $mkFloor::c_entry = $c->Entry(
# -width => '10',
# -relief => 'sunken',
# ...
# );
#
# We leave it to the aligner to decide how to do this.
if ( $line == 1 && $i == $ibeg ) {
$this_batch->[_do_not_pad_] = 1;
}
#--------------------------------------------
# Now see what the error is and try to fix it
#--------------------------------------------
my $closing_index = $indentation->get_closed();
my $predicted_pos = $indentation->get_spaces();
# Find actual position:
my $actual_pos;
if ( $i == $ibeg ) {
# Case 1: token is first character of of batch - table lookup
if ( $line == 0 ) {
$actual_pos = $predicted_pos;
my ( $indent, $offset, $is_leading_uu, $exists_uu ) =
get_saved_opening_indentation($align_seqno);
if ( defined($indent) ) {
# NOTE: we could use '1' here if no space after
# opening and '2' if want space; it is hardwired at 1
# like -gnu-style. But it is probably best to leave
# this alone because changing it would change
# formatting of much existing code without any
# significant benefit.
$actual_pos = get_spaces($indent) + $offset + 1;
}
}
# Case 2: token starts a new line - use length of previous line
else {
my $ibegm = $ri_first->[ $line - 1 ];
my $iendm = $ri_last->[ $line - 1 ];
$actual_pos = total_line_length( $ibegm, $iendm );
# follow -pt style
++$actual_pos
if ( $types_to_go[ $iendm + 1 ] eq 'b' );
}
}
# Case 3: $i>$ibeg: token is mid-line - use length to previous token
else {
$actual_pos = total_line_length( $ibeg, $i - 1 );
# for mid-line token, we must check to see if all
# additional lines have continuation indentation,
# and remove it if so. Otherwise, we do not get
# good alignment.
if ( $closing_index > $iend ) {
my $ibeg_next = $ri_first->[ $line + 1 ];
if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
$self->undo_lp_ci( $line, $i, $closing_index,
$ri_first, $ri_last );
}
}
}
# By how many spaces (plus or minus) would we need to increase the
# indentation to get alignment with the opening token?
my $move_right = $actual_pos - $predicted_pos;
if (DEBUG_CORRECT_LP) {
my $tok = substr( $tokens_to_go[$i], 0, 8 );
my $avail = $self->get_available_spaces_to_go($ibeg);
print
"CORRECT_LP for seq=$align_seqno, predicted pos=$predicted_pos actual=$actual_pos => move right=$move_right available=$avail i=$i max=$max_index_to_go tok=$tok\n";
}
# nothing more to do if no error to correct (gnu2.t)
if ( $move_right == 0 ) {
$indentation->set_recoverable_spaces($move_right);
next;
}
# Get any collapsed length defined for -xlp
my $collapsed_length =
$self->[_rcollapsed_length_by_seqno_]->{$align_seqno};
$collapsed_length = 0 unless ( defined($collapsed_length) );
if (DEBUG_CORRECT_LP) {
print
"CORRECT_LP for seq=$align_seqno, collapsed length is $collapsed_length\n";
}
# if we have not seen closure for this indentation in this batch,
# and do not have a collapsed length estimate, we can only pass on
# a request to the vertical aligner
if ( $closing_index < 0 && !$collapsed_length ) {
$indentation->set_recoverable_spaces($move_right);
next;
}
# If necessary, look ahead to see if there is really any leading
# whitespace dependent on this whitespace, and also find the
# longest line using this whitespace. Since it is always safe to
# move left if there are no dependents, we only need to do this if
# we may have dependent nodes or need to move right.
my $have_child = $indentation->get_have_child();
my %saw_indentation;
my $line_count = 1;
$saw_indentation{$indentation} = $indentation;
# How far can we move right before we hit the limit?
# let $right_margen = the number of spaces that we can increase
# the current indentation before hitting the maximum line length.
my $right_margin = 0;
if ( $have_child || $move_right > 0 ) {
$have_child = 0;
# include estimated collapsed length for incomplete containers
my $max_length = 0;
if ( $Kc > $K_to_go[$max_index_to_go] ) {
$max_length = $collapsed_length + $predicted_pos;
}
if ( $i == $ibeg ) {
my $length = total_line_length( $ibeg, $iend );
if ( $length > $max_length ) { $max_length = $length }
}
# look ahead at the rest of the lines of this batch..
foreach my $line_t ( $line + 1 .. $max_line ) {
my $ibeg_t = $ri_first->[$line_t];
my $iend_t = $ri_last->[$line_t];
last if ( $closing_index <= $ibeg_t );
# remember all different indentation objects
my $indentation_t = $leading_spaces_to_go[$ibeg_t];
$saw_indentation{$indentation_t} = $indentation_t;
$line_count++;
# remember longest line in the group
my $length_t = total_line_length( $ibeg_t, $iend_t );
if ( $length_t > $max_length ) {
$max_length = $length_t;
}
}
$right_margin =
$maximum_line_length_at_level[ $levels_to_go[$ibeg] ] -
$max_length;
if ( $right_margin < 0 ) { $right_margin = 0 }
}
my $first_line_comma_count =
grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
my $comma_count = $indentation->get_comma_count();
my $arrow_count = $indentation->get_arrow_count();
# This is a simple approximate test for vertical alignment:
# if we broke just after an opening paren, brace, bracket,
# and there are 2 or more commas in the first line,
# and there are no '=>'s,
# then we are probably vertically aligned. We could set
# an exact flag in sub break_lists, but this is good
# enough.
my $indentation_count = keys %saw_indentation;
my $is_vertically_aligned =
( $i == $ibeg
&& $first_line_comma_count > 1
&& $indentation_count == 1
&& ( $arrow_count == 0 || $arrow_count == $line_count ) );
# Make the move if possible ..
if (
# we can always move left
$move_right < 0
# -xlp
# incomplete container
|| ( $rOpts_extended_line_up_parentheses
&& $Kc > $K_to_go[$max_index_to_go] )
|| $closing_index < 0
# but we should only move right if we are sure it will
# not spoil vertical alignment
|| ( $comma_count == 0 )
|| ( $comma_count > 0 && !$is_vertically_aligned )
)
{
my $move =
( $move_right <= $right_margin )
? $move_right
: $right_margin;
if (DEBUG_CORRECT_LP) {
print
"CORRECT_LP for seq=$align_seqno, moving $move spaces\n";
}
foreach ( keys %saw_indentation ) {
$saw_indentation{$_}
->permanently_decrease_available_spaces( -$move );
}
}
# Otherwise, record what we want and the vertical aligner
# will try to recover it.
else {
$indentation->set_recoverable_spaces($move_right);
}
} ## end loop over tokens in a line
} ## end loop over lines
return;
} ## end sub correct_lp_indentation
sub correct_lp_indentation_pass_1 {
my ( $self, $ri_first, $ri_last, $ri_starting_one_line_block ) = @_;
# So some of the one-line blocks may be too long when given -lp
# indentation. We will fix that now if possible, using the list of these
# closing block indexes.
# Given:
# $ri_first - reference to current list of the first index $i for each
# output line in this batch
# $ri_last - reference to current list of the last index $i for each
# output line in this batch
# $ri_starting_one_line_block = list of indexes starting 1-line blocks
my @ilist = @{$ri_starting_one_line_block};
return unless (@ilist);
my $max_line = @{$ri_first} - 1;
my $inext = shift @ilist;
# loop over lines, checking length of each with a one-line block
my ( $ibeg, $iend );
foreach my $line ( 0 .. $max_line ) {
$iend = $ri_last->[$line];
next if ( $inext > $iend );
$ibeg = $ri_first->[$line];
# This is just for lines with indentation objects (c098)
my $excess =
ref( $leading_spaces_to_go[$ibeg] )
? $self->excess_line_length( $ibeg, $iend )
: 0;
if ( $excess > 0 ) {
my $available_spaces = $self->get_available_spaces_to_go($ibeg);
if ( $available_spaces > 0 ) {
my $delete_want = min( $available_spaces, $excess );
my $deleted_spaces_uu =
$self->reduce_lp_indentation( $ibeg, $delete_want );
$available_spaces = $self->get_available_spaces_to_go($ibeg);
}
}
# skip forward to next one-line block to check
while (@ilist) {
$inext = shift @ilist;
next if ( $inext <= $iend );
last if ( $inext > $iend );
}
last if ( $inext <= $iend );
}
return;
} ## end sub correct_lp_indentation_pass_1
sub undo_lp_ci {
my ( $self, $line_open, $i_start, $closing_index, $ri_first, $ri_last ) =
@_;
# If there is a single, long parameter within parens, like this:
#
# $self->command( "/msg "
# . $infoline->chan
# . " You said $1, but did you know that it's square was "
# . $1 * $1 . " ?" );
#
# we can remove the continuation indentation of the 2nd and higher lines
# to achieve this effect, which is more pleasing:
#
# $self->command("/msg "
# . $infoline->chan
# . " You said $1, but did you know that it's square was "
# . $1 * $1 . " ?");
# Given:
# $line_open = index of line with opening paren
# $i_start = index of token at end of starting line ["/msg" above]
# $closing_index = index of the closing token
# $ri_first - reference to current list of the first index $i for each
# output line in this batch
# $ri_last - reference to current list of the last index $i for each
# output line in this batch
my $max_line = @{$ri_first} - 1;
# must be multiple lines
return if ( $max_line <= $line_open );
my $lev_start = $levels_to_go[$i_start];
my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
# see if all additional lines in this container have continuation
# indentation
my $line_1 = 1 + $line_open;
my $n = $line_open;
while ( ++$n <= $max_line ) {
my $ibeg = $ri_first->[$n];
my $iend = $ri_last->[$n];
if ( $ibeg eq $closing_index ) { $n--; last }
return if ( $lev_start != $levels_to_go[$ibeg] );
return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
last if ( $closing_index <= $iend );
} ## end while ( ++$n <= $max_line)
# we can reduce the indentation of all continuation lines
my $continuation_line_count = $n - $line_open;
@ci_levels_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
(0) x ($continuation_line_count);
@leading_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ] =
@reduced_spaces_to_go[ @{$ri_first}[ $line_1 .. $n ] ];
return;
} ## end sub undo_lp_ci
################################################
# CODE SECTION 10: Code to break long statements
################################################
use constant DEBUG_BREAK_LINES => 0;
sub break_long_lines {
my ( $self, $saw_good_break, $rcolon_list, $rbond_strength_bias ) = @_;
#-----------------------------------------------------------
# Break a batch of tokens into lines which do not exceed the
# maximum line length.
#-----------------------------------------------------------
# Input parameters:
# $saw_good_break - a flag set by break_lists
# $rcolon_list - ref to a list of all the ? and : tokens in the batch,
# in order.
# $rbond_strength_bias - small bond strength bias values set by break_lists
# Output: returns references to the arrays:
# @i_first
# @i_last
# which contain the indexes $i of the first and last tokens on each
# line.
# In addition, the array:
# $forced_breakpoint_to_go[$i]
# may be updated to be =1 for any index $i after which there must be
# a break. This signals later routines not to undo the breakpoint.
# Method:
# This routine is called if a statement is longer than the maximum line
# length, or if a preliminary scanning located desirable break points.
# Sub break_lists has already looked at these tokens and set breakpoints
# (in array $forced_breakpoint_to_go[$i]) where it wants breaks (for
# example after commas, after opening parens, and before closing parens).
# This routine will honor these breakpoints and also add additional
# breakpoints as necessary to keep the line length below the maximum
# requested. It bases its decision on where the 'bond strength' is
# lowest.
my @i_first = (); # the first index to output
my @i_last = (); # the last index to output
my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
# Get the 'bond strengths' between tokens
my $rbond_strength_to_go = $self->set_bond_strengths();
# Add any comma bias set by break_lists
if ( @{$rbond_strength_bias} ) {
foreach my $item ( @{$rbond_strength_bias} ) {
my ( $ii, $bias ) = @{$item};
if ( $ii >= 0 && $ii <= $max_index_to_go ) {
$rbond_strength_to_go->[$ii] += $bias;
}
else {
if (DEVEL_MODE) {
my $KK = $K_to_go[0];
my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
Fault(
"Bad bond strength bias near line $lno: i=$ii must be between 0 and $max_index_to_go\n"
);
}
}
}
}
my $imin = 0;
my $imax = $max_index_to_go;
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
my $i_begin = $imin;
my $last_break_strength = NO_BREAK;
my $i_last_break = -1;
my $line_count = 0;
# see if any ?/:'s are in order
my $colons_in_order = 1;
my $last_tok = EMPTY_STRING;
foreach ( @{$rcolon_list} ) {
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
$last_tok = $_;
}
# This is a sufficient but not necessary condition for colon chain
my $is_colon_chain = ( $colons_in_order && @{$rcolon_list} > 2 );
#------------------------------------------
# BEGINNING of main loop to set breakpoints
# Keep iterating until we reach the end
#------------------------------------------
while ( $i_begin <= $imax ) {
#------------------------------------------------------------------
# Find the best next breakpoint based on token-token bond strengths
#------------------------------------------------------------------
my ( $i_lowest, $lowest_strength, $Msg ) =
$self->break_lines_inner_loop(
$i_begin,
$i_last_break,
$imax,
$last_break_strength,
$line_count,
$rbond_strength_to_go,
$saw_good_break,
);
# Now make any adjustments required by ternary breakpoint rules
if ( @{$rcolon_list} ) {
my $i_next_nonblank = $inext_to_go[$i_lowest];
#-------------------------------------------------------
# ?/: rule 1 : if a break here will separate a '?' on this
# line from its closing ':', then break at the '?' instead.
# But do not break a sequential chain of ?/: statements
#-------------------------------------------------------
if ( !$is_colon_chain ) {
foreach my $i ( $i_begin + 1 .. $i_lowest - 1 ) {
next unless ( $tokens_to_go[$i] eq '?' );
# do not break if statement is broken by side comment
next
if ( $tokens_to_go[$max_index_to_go] eq '#'
&& terminal_type_i( 0, $max_index_to_go ) !~
/^[\;\}]$/ );
# no break needed if matching : is also on the line
next
if ( defined( $mate_index_to_go[$i] )
&& $mate_index_to_go[$i] <= $i_next_nonblank );
$i_lowest = $i;
if ( $want_break_before{'?'} ) { $i_lowest-- }
$i_next_nonblank = $inext_to_go[$i_lowest];
last;
}
}
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
#-------------------------------------------------------------
# ?/: rule 2 : if we break at a '?', then break at its ':'
#
# Note: this rule is also in sub break_lists to handle a break
# at the start and end of a line (in case breaks are dictated
# by side comments).
#-------------------------------------------------------------
if ( $next_nonblank_type eq '?' ) {
$self->set_closing_breakpoint($i_next_nonblank);
}
elsif ( $types_to_go[$i_lowest] eq '?' ) {
$self->set_closing_breakpoint($i_lowest);
}
else {
# not at a '?'
}
#--------------------------------------------------------
# ?/: rule 3 : if we break at a ':' then we save
# its location for further work below. We may need to go
# back and break at its '?'.
#--------------------------------------------------------
if ( $next_nonblank_type eq ':' ) {
push @i_colon_breaks, $i_next_nonblank;
}
elsif ( $types_to_go[$i_lowest] eq ':' ) {
push @i_colon_breaks, $i_lowest;
}
else {
# not at a ':'
}
# here we should set breaks for all '?'/':' pairs which are
# separated by this line
}
# Fix two-line shear (c406)
my $i_next_nonblank = $inext_to_go[$i_lowest];
if ( $tokens_to_go[$i_next_nonblank] eq ')' ) {
# Example of a '2 line shear':
# $wrapped->add_around_modifier(
# sub { push @tracelog => 'around 1'; $_[0]->(); } );
# If we try formatting this with increasing line lengths, the
# break based on bond strengths is after the '(' until the closing
# paren is just beyond the line length limit. In that case, it can
# switch to being just before the ')'. This is rare, and may be
# undesirable because it can cause unexpected formatting
# variations between similar code, and worse, instability with
# trailing commas. So we check for that here and put the break
# back after the opening '(' if the ')' is not preceded by a ','.
# Issue c406.
my $i_prev = iprev_to_go($i_next_nonblank);
my $i_opening = $mate_index_to_go[$i_next_nonblank];
if ( $types_to_go[$i_prev] ne ','
&& defined($i_opening)
&& $i_opening > $i_last_break )
{
# set a forced breakpoint to block recombination
$i_lowest = $i_opening;
$forced_breakpoint_to_go[$i_lowest] = 1;
}
}
#--------------------------------------------------
# guard against infinite loop (should never happen)
#--------------------------------------------------
if ( $i_lowest <= $i_last_break ) {
DEVEL_MODE
&& Fault("i_lowest=$i_lowest <= i_last_break=$i_last_break\n");
$i_lowest = $imax;
}
DEBUG_BREAK_LINES
&& print {*STDOUT}
"BREAK: best is i = $i_lowest strength = $lowest_strength;\nReason>> $Msg\n";
$line_count++;
# save this line segment, after trimming blanks at the ends
push( @i_first,
( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
push( @i_last,
( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
# set a forced breakpoint at a container opening, if necessary, to
# signal a break at a closing container. Excepting '(' for now.
if (
(
$tokens_to_go[$i_lowest] eq '{'
|| $tokens_to_go[$i_lowest] eq '['
)
&& !$forced_breakpoint_to_go[$i_lowest]
)
{
$self->set_closing_breakpoint($i_lowest);
}
# get ready to find the next breakpoint
$last_break_strength = $lowest_strength;
$i_last_break = $i_lowest;
$i_begin = $i_lowest + 1;
# skip past a blank
if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
$i_begin++;
}
} ## end while ( $i_begin <= $imax)
#-------------------------------------------------
# END of main loop to set continuation breakpoints
#-------------------------------------------------
#-----------------------------------------------------------
# ?/: rule 4 -- if we broke at a ':', then break at
# corresponding '?' unless this is a chain of ?: expressions
#-----------------------------------------------------------
if (@i_colon_breaks) {
my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
if ( !$is_chain ) {
$self->do_colon_breaks( \@i_colon_breaks, \@i_first, \@i_last );
}
}
return ( \@i_first, \@i_last, $rbond_strength_to_go );
} ## end sub break_long_lines
# small bond strength numbers to help break ties
use constant TINY_BIAS => 0.0001;
use constant MAX_BIAS => 0.001;
my %is_dot_and_or;
BEGIN {
my @q = qw( . && || );
@is_dot_and_or{@q} = (1) x scalar(@q);
}
sub break_lines_inner_loop {
# Find the best next breakpoint in index range ($i_begin .. $imax)
# which, if possible, does not exceed the maximum line length.
my (
$self,
$i_begin,
$i_last_break,
$imax,
$last_break_strength,
$line_count,
$rbond_strength_to_go,
$saw_good_break,
) = @_;
# Given:
# $i_begin = first index of range
# $i_last_break = index of previous break
# $imax = last index of range
# $last_break_strength = bond strength of last break
# $line_count = number of output lines so far
# $rbond_strength_to_go = ref to array of bond strengths
# $saw_good_break = true if old line had a good breakpoint
# Returns:
# $i_lowest = index of best breakpoint
# $lowest_strength = 'bond strength' at best breakpoint
# $Msg = string of debug info
my $Msg = EMPTY_STRING;
my $strength = NO_BREAK;
my $i_test = $i_begin - 1;
my $i_lowest = -1;
my $starting_sum = $summed_lengths_to_go[$i_begin];
my $lowest_strength = NO_BREAK;
my $leading_alignment_type = EMPTY_STRING;
my $leading_spaces = leading_spaces_to_go($i_begin);
my $maximum_line_length =
$maximum_line_length_at_level[ $levels_to_go[$i_begin] ];
DEBUG_BREAK_LINES
&& do {
$Msg .= "updating leading spaces to be $leading_spaces at i=$i_begin\n";
};
# Do not separate an isolated bare word from an opening paren.
# Alternate Fix #2 for issue b1299. This waits as long as possible
# to make the decision.
# Note for fix #c250: to keep line breaks unchanged under -extrude when
# switching from 'i' to 'S' for subs, we would have to also check 'S', i.e.
# =~/^[Si]$/. But this was never necessary at a sub signature, so we leave
# it alone and allow the new version to be different for --extrude. For a
# test file run perl527/signatures.t with --extrude.
if ( $types_to_go[$i_begin] eq 'i'
&& substr( $tokens_to_go[$i_begin], 0, 1 ) =~ /\w/ )
{
my $i_next_nonblank = $inext_to_go[$i_begin];
if ( $tokens_to_go[$i_next_nonblank] eq '(' ) {
$rbond_strength_to_go->[$i_begin] = NO_BREAK;
}
}
# Avoid a break which would strand a single punctuation
# token. For example, we do not want to strand a leading
# '.' which is followed by a long quoted string.
# But note that we do want to do this with -extrude (l=1)
# so please test any changes to this code on -extrude.
if (
( $i_begin < $imax )
&& ( $tokens_to_go[$i_begin] eq $types_to_go[$i_begin] )
&& !$forced_breakpoint_to_go[$i_begin]
&& !(
# Allow break after a closing eval brace. This is an
# approximate way to simulate a forced breakpoint made in
# Section B below. No differences have been found, but if
# necessary the full logic of Section B could be used here
# (see c165).
$tokens_to_go[$i_begin] eq '}'
&& $block_type_to_go[$i_begin]
&& $block_type_to_go[$i_begin] eq 'eval'
)
&& (
(
$leading_spaces +
$summed_lengths_to_go[ $i_begin + 1 ] -
$starting_sum
) < $maximum_line_length
)
)
{
$i_test = min( $imax, $inext_to_go[$i_begin] ) - 1;
DEBUG_BREAK_LINES && do {
$Msg .= " :skip ahead at i=$i_test";
};
}
#-------------------------------------------------------
# Begin INNER_LOOP over the indexes in the _to_go arrays
#-------------------------------------------------------
while ( ++$i_test <= $imax ) {
my $type = $types_to_go[$i_test];
my $token = $tokens_to_go[$i_test];
my $i_next_nonblank = $inext_to_go[$i_test];
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
#---------------------------------------------------------------
# Section A: Get token-token strength and handle any adjustments
#---------------------------------------------------------------
# adjustments to the previous bond strength may have been made, and
# we must keep the bond strength of a token and its following blank
# the same;
my $last_strength = $strength;
$strength = $rbond_strength_to_go->[$i_test];
if ( $type eq 'b' ) { $strength = $last_strength }
# reduce strength a bit to break ties at an old comma breakpoint ...
if (
$old_breakpoint_to_go[$i_test]
# Patch: limited to just commas to avoid blinking states
&& $type eq ','
# which is a 'good' breakpoint, meaning ...
# we don't want to break before it
&& !$want_break_before{$type}
# and either we want to break before the next token
# or the next token is not short (i.e. not a '*', '/' etc.)
&& $i_next_nonblank <= $imax
&& ( $want_break_before{$next_nonblank_type}
|| $token_lengths_to_go[$i_next_nonblank] > 2
|| $next_nonblank_type eq ','
|| $is_opening_type{$next_nonblank_type} )
)
{
$strength -= TINY_BIAS;
DEBUG_BREAK_LINES && do { $Msg .= " :-bias at i=$i_test" };
}
# otherwise increase strength a bit if this token would be at the
# maximum line length. This is necessary to avoid blinking
# in the above example when the -iob flag is added.
else {
my $len =
$leading_spaces +
$summed_lengths_to_go[ $i_test + 1 ] -
$starting_sum;
if ( $len >= $maximum_line_length ) {
$strength += TINY_BIAS;
DEBUG_BREAK_LINES && do { $Msg .= " :+bias at i=$i_test" };
}
}
#-------------------------------------
# Section B: Handle forced breakpoints
#-------------------------------------
my $must_break;
# Force an immediate break at certain operators
# with lower level than the start of the line,
# unless we've already seen a better break.
#
# Note on an issue with a preceding '?' :
# There may be a break at a previous ? if the line is long. Because
# of this we do not want to force a break if there is a previous ? on
# this line. For now the best way to do this is to not break if we
# have seen a lower strength point, which is probably a ?.
#
# Example of unwanted breaks we are avoiding at a '.' following a ?
# from pod2html using perltidy -gnu:
# )
# ? "\n<A NAME=\""
# . $value
# . "\">\n$text</A>\n"
# : "\n$type$pod2.html\#" . $value . "\">$text<\/A>\n";
if (
( $strength <= $lowest_strength )
&& ( $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_next_nonblank] )
&& (
## /^(\.|\&\&|\|\|)$/
$is_dot_and_or{$next_nonblank_type}
|| (
$next_nonblank_type eq 'k'
## /^(and|or)$/ # note: includes 'xor' now
&& $is_and_or{$next_nonblank_token}
)
)
)
{
$self->set_forced_breakpoint($i_next_nonblank);
DEBUG_BREAK_LINES
&& do { $Msg .= " :Forced break at i=$i_next_nonblank" };
}
if (
# Try to put a break where requested by break_lists
$forced_breakpoint_to_go[$i_test]
# break between ) { in a continued line so that the '{' can
# be outdented
# See similar logic in break_lists which catches instances
# where a line is just something like ') {'. We have to
# be careful because the corresponding block keyword might
# not be on the first line, such as 'for' here:
#
# eval {
# for ("a") {
# for $x ( 1, 2 ) { local $_ = "b"; s/(.*)/+$1/ }
# }
# };
#
|| (
$line_count
&& ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
&& ( $next_nonblank_block_type ne $tokens_to_go[$i_begin] )
# RT #104427: Dont break before opening sub brace because
# sub block breaks handled at higher level, unless
# it looks like the preceding list is long and broken
&& !(
(
$next_nonblank_block_type =~ /$SUB_PATTERN/
|| $matches_ASUB{$next_nonblank_block_type}
)
&& ( $nesting_depth_to_go[$i_begin] ==
$nesting_depth_to_go[$i_next_nonblank] )
)
&& !$rOpts_opening_brace_always_on_right
)
# There is an implied forced break at a terminal opening brace
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
)
{
# Forced breakpoints must sometimes be overridden, for example
# because of a side comment causing a NO_BREAK. It is easier
# to catch this here than when they are set.
if ( $strength < NO_BREAK - 1 ) {
$strength = $lowest_strength - TINY_BIAS;
$must_break = 1;
DEBUG_BREAK_LINES
&& do { $Msg .= " :set must_break at i=$i_next_nonblank" };
}
}
# quit if a break here would put a good terminal token on
# the next line and we already have a possible break
if (
( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
&& !$must_break
&& (
(
$leading_spaces +
$summed_lengths_to_go[ $i_next_nonblank + 1 ] -
$starting_sum
) > $maximum_line_length
)
)
{
if ( $i_lowest >= 0 ) {
DEBUG_BREAK_LINES && do {
$Msg .= " :quit at good terminal='$next_nonblank_type'";
};
last;
}
}
#------------------------------------------------------------
# Section C: Look for the lowest bond strength between tokens
#------------------------------------------------------------
if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) ) {
# break at previous best break if it would have produced
# a leading alignment of certain common tokens, and it
# is different from the latest candidate break
if ($leading_alignment_type) {
DEBUG_BREAK_LINES && do {
$Msg .=
" :last at leading_alignment='$leading_alignment_type'";
};
last;
}
# Force at least one breakpoint if old code had good
# break It is only called if a breakpoint is required or
# desired. This will probably need some adjustments
# over time. A goal is to try to be sure that, if a new
# side comment is introduced into formatted text, then
# the same breakpoints will occur. scbreak.t
if (
$i_test == $imax # we are at the end
&& !$forced_breakpoint_count
&& $saw_good_break # old line had good break
&& $type =~ /^[#;\{]$/ # and this line ends in
# ';' or side comment
&& $i_last_break < 0 # and we haven't made a break
&& $i_lowest >= 0 # and we saw a possible break
&& $i_lowest < $imax - 1 # (but not just before this ;)
&& $strength - $lowest_strength < 0.5 * WEAK # and it's good
)
{
DEBUG_BREAK_LINES && do {
$Msg .= " :last at good old break\n";
};
last;
}
# Do not skip past an important break point in a short final
# segment. For example, without this check we would miss the
# break at the final / in the following code:
#
# $depth_stop =
# ( $tau * $mass_pellet * $q_0 *
# ( 1. - exp( -$t_stop / $tau ) ) -
# 4. * $pi * $factor * $k_ice *
# ( $t_melt - $t_ice ) *
# $r_pellet *
# $t_stop ) /
# ( $rho_ice * $Qs * $pi * $r_pellet**2 );
#
if (
$line_count > 2
&& $i_lowest >= 0 # and we saw a possible break
&& $i_lowest < $i_test
&& $i_test > $imax - 2
&& $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_lowest]
&& $lowest_strength < $last_break_strength - .5 * WEAK
)
{
# Make this break for math operators for now
my $ir = $inext_to_go[$i_lowest];
my $il = iprev_to_go($ir);
if ( $types_to_go[$il] =~ /^[\/\*\+\-\%]$/
|| $types_to_go[$ir] =~ /^[\/\*\+\-\%]$/ )
{
DEBUG_BREAK_LINES && do {
$Msg .= " :last-noskip_short";
};
last;
}
}
# Update the minimum bond strength location
$lowest_strength = $strength;
$i_lowest = $i_test;
if ($must_break) {
DEBUG_BREAK_LINES && do {
$Msg .= " :last-must_break";
};
last;
}
# set flags to remember if a break here will produce a
# leading alignment of certain common tokens
if ( $line_count > 0
&& $i_test < $imax
&& ( $lowest_strength - $last_break_strength <= MAX_BIAS ) )
{
my $i_last_end = iprev_to_go($i_begin);
my $tok_beg = $tokens_to_go[$i_begin];
my $type_beg = $types_to_go[$i_begin];
if (
# check for leading alignment of certain tokens
(
$tok_beg eq $next_nonblank_token
&& $is_chain_operator{$tok_beg}
&& ( $type_beg eq 'k'
|| $type_beg eq $tok_beg )
&& $nesting_depth_to_go[$i_begin] >=
$nesting_depth_to_go[$i_next_nonblank]
)
|| ( $tokens_to_go[$i_last_end] eq $token
&& $is_chain_operator{$token}
&& ( $type eq 'k' || $type eq $token )
&& $nesting_depth_to_go[$i_last_end] >=
$nesting_depth_to_go[$i_test] )
)
{
$leading_alignment_type = $next_nonblank_type;
}
}
}
#-----------------------------------------------------------
# Section D: See if the maximum line length will be exceeded
#-----------------------------------------------------------
# Quit if there are no more tokens to test
last if ( $i_test >= $imax );
# Keep going if we have not reached the limit
my $excess =
$leading_spaces +
$summed_lengths_to_go[ $i_test + 2 ] -
$starting_sum -
$maximum_line_length;
if ( $excess < 0 ) {
next;
}
elsif ( $excess == 0 ) {
# To prevent blinkers we will avoid leaving a token exactly at
# the line length limit unless it is the last token or one of
# several "good" types.
#
# The following code was a blinker with -pbp before this
# modification:
# $last_nonblank_token eq '('
# && $is_indirect_object_taker{ $paren_type
# [$paren_depth] }
# The issue causing the problem is that if the
# term [$paren_depth] gets broken across a line then
# the whitespace routine doesn't see both opening and closing
# brackets and will format like '[ $paren_depth ]'. This
# leads to an oscillation in length depending if we break
# before the closing bracket or not.
if ( $i_test + 1 < $imax
&& $next_nonblank_type ne ','
&& !$is_closing_type{$next_nonblank_type} )
{
# too long
DEBUG_BREAK_LINES && do {
$Msg .= " :too_long";
}
}
else {
next;
}
}
else {
# too long
}
# a break here makes the line too long ...
DEBUG_BREAK_LINES && do {
my $ltok = $token;
my $rtok =
$next_nonblank_token ? $next_nonblank_token : EMPTY_STRING;
my $i_testp2 = $i_test + 2;
if ( $i_testp2 > $max_index_to_go + 1 ) {
$i_testp2 = $max_index_to_go + 1;
}
if ( length($ltok) > 6 ) { $ltok = substr( $ltok, 0, 8 ) }
if ( length($rtok) > 6 ) { $rtok = substr( $rtok, 0, 8 ) }
print {*STDOUT}
"BREAK: i=$i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type sp=($leading_spaces) lnext= $summed_lengths_to_go[$i_testp2] str=$strength $ltok $rtok\n";
};
# Exception: allow one extra terminal token after exceeding line length
# if it would strand this token.
if ( $i_lowest == $i_test
&& $token_lengths_to_go[$i_test] > 1
&& ( $next_nonblank_type eq ';' || $next_nonblank_type eq ',' )
&& $rOpts_fuzzy_line_length )
{
DEBUG_BREAK_LINES && do {
$Msg .= " :do_not_strand next='$next_nonblank_type'";
};
next;
}
# Stop if here if we have a solution and the line will be too long
if ( $i_lowest >= 0 ) {
DEBUG_BREAK_LINES && do {
$Msg .=
" :Done-too_long && i_lowest=$i_lowest at itest=$i_test, imax=$imax";
};
last;
}
} ## end while ( ++$i_test <= $imax)
#-----------------------------------------------------
# End INNER_LOOP over the indexes in the _to_go arrays
#-----------------------------------------------------
# Be sure we return an index in the range ($ibegin .. $imax).
# We will break at imax if no other break was found.
if ( $i_lowest < 0 ) { $i_lowest = $imax }
return ( $i_lowest, $lowest_strength, $Msg );
} ## end sub break_lines_inner_loop
sub do_colon_breaks {
my ( $self, $ri_colon_breaks, $ri_first, $ri_last ) = @_;
# Given:
# $ri_colon_breaks = ref to list of indexes breaks at ':' tokens
# $ri_first - reference to current list of the first index $i for each
# output line in this batch
# $ri_last - reference to current list of the last index $i for each
# output line in this batch
# Add additional breaks if we are in a ?/: chain.
# Simplified method used here: This is a ?/: chain if it has
# multiple ?/: pairs all in order; otherwise not.
my @insert_list = ();
foreach ( @{$ri_colon_breaks} ) {
my $i_question = $mate_index_to_go[$_];
if ( defined($i_question) ) {
if ( $want_break_before{'?'} ) {
$i_question = iprev_to_go($i_question);
}
if ( $i_question >= 0 ) {
push @insert_list, $i_question;
}
}
$self->insert_additional_breaks( \@insert_list, $ri_first, $ri_last );
}
return;
} ## end sub do_colon_breaks
###########################################
# CODE SECTION 11: Code to break long lists
###########################################
{ ## begin closure break_lists
# These routines and variables are involved in finding good
# places to break long lists.
my (
$block_type,
$current_depth,
$depth,
$i_last_colon,
$i_line_end,
$i_line_start,
$i_last_nonblank_token,
$last_nonblank_block_type,
$last_nonblank_token,
$last_nonblank_type,
$last_old_breakpoint_count,
$minimum_depth,
$next_nonblank_block_type,
$next_nonblank_token,
$next_nonblank_type,
$old_breakpoint_count,
$starting_breakpoint_count,
$starting_depth,
$token,
$type,
$type_sequence,
);
my (
@breakpoint_stack,
@breakpoint_undo_stack,
@comma_index,
@container_type,
@identifier_count_stack,
@index_before_arrow,
@interrupted_list,
@item_count_stack,
@last_comma_index,
@last_dot_index,
@last_nonblank_type_stack,
@old_breakpoint_count_stack,
@opening_structure_index_stack,
@rfor_semicolon_list,
@has_old_logical_breakpoints,
@rand_or_list,
@i_equals,
@override_cab3,
@type_sequence_stack,
);
# these arrays must retain values between calls
my ( @has_broken_sublist, @dont_align, @want_comma_break );
my $length_tol;
my $lp_tol_boost;
sub initialize_break_lists {
@dont_align = ();
@has_broken_sublist = ();
@want_comma_break = ();
#---------------------------------------------------
# Set tolerances to prevent formatting instabilities
#---------------------------------------------------
# Define tolerances to use when checking if closed
# containers will fit on one line. This is necessary to avoid
# formatting instability. The basic tolerance is based on the
# following:
# - Always allow for at least one extra space after a closing token so
# that we do not strand a comma or semicolon. (oneline.t).
# - Use an increased line length tolerance when -ci > -i to avoid
# blinking states (case b923 and others).
$length_tol =
1 + max( 0, $rOpts_continuation_indentation - $rOpts_indent_columns );
# In addition, it may be necessary to use a few extra tolerance spaces
# when -lp is used and/or when -xci is used. The history of this
# so far is as follows:
# FIX1: At least 3 characters were been found to be required for -lp
# to fixes cases b1059 b1063 b1117.
# FIX2: Further testing showed that we need a total of 3 extra spaces
# when -lp is set for non-lists, and at least 2 spaces when -lp and
# -xci are set.
# Fixes cases b1063 b1103 b1134 b1135 b1136 b1138 b1140 b1143 b1144
# b1145 b1146 b1147 b1148 b1151 b1152 b1153 b1154 b1156 b1157 b1164
# b1165
# FIX3: To fix cases b1169 b1170 b1171, an update was made in sub
# 'find_token_starting_list' to go back before an initial blank space.
# This fixed these three cases, and allowed the tolerances to be
# reduced to continue to fix all other known cases of instability.
# This gives the current tolerance formulation.
$lp_tol_boost = 0;
if ($rOpts_line_up_parentheses) {
# boost tol for combination -lp -xci
if ($rOpts_extended_continuation_indentation) {
$lp_tol_boost = 2;
# and one more for -lp -xci -vmll (b1470, b1474, b1266)
if ($rOpts_variable_maximum_line_length) {
$lp_tol_boost = max( 2, $rOpts_indent_columns );
}
}
# boost tol for combination -lp and any -vtc > 0, but only for
# non-list containers
else {
foreach ( keys %closing_vertical_tightness ) {
next
unless ( $closing_vertical_tightness{$_} );
$lp_tol_boost = 1; # Fixes B1193;
last;
}
}
}
# Define a level where list formatting becomes highly stressed and
# needs to be simplified. Introduced for case b1262.
# $list_stress_level = min($stress_level_alpha, $stress_level_beta + 2);
# This is now '$high_stress_level'.
return;
} ## end sub initialize_break_lists
# routine to define essential variables when we go 'up' to
# a new depth
sub check_for_new_minimum_depth {
my ( $self, $depth_t, $seqno ) = @_;
# Initialize for a new minimum depth
# Given:
# $depth_t = new depth
# $seqno = sequence number of the parent container
if ( $depth_t < $minimum_depth ) {
$minimum_depth = $depth_t;
# these arrays need not retain values between calls
my $old_seqno = $type_sequence_stack[$depth_t];
my $changed_seqno = !defined($old_seqno) || $old_seqno != $seqno;
$type_sequence_stack[$depth_t] = $seqno;
$override_cab3[$depth_t] = undef;
if ( $rOpts_comma_arrow_breakpoints == 3 && $seqno ) {
$override_cab3[$depth_t] = $self->[_roverride_cab3_]->{$seqno};
}
$breakpoint_stack[$depth_t] = $starting_breakpoint_count;
$container_type[$depth_t] = EMPTY_STRING;
$identifier_count_stack[$depth_t] = 0;
$index_before_arrow[$depth_t] = -1;
$interrupted_list[$depth_t] = 1;
$item_count_stack[$depth_t] = 0;
$last_nonblank_type_stack[$depth_t] = EMPTY_STRING;
$opening_structure_index_stack[$depth_t] = -1;
$breakpoint_undo_stack[$depth_t] = undef;
$comma_index[$depth_t] = undef;
$last_comma_index[$depth_t] = undef;
$last_dot_index[$depth_t] = undef;
$old_breakpoint_count_stack[$depth_t] = undef;
$has_old_logical_breakpoints[$depth_t] = 0;
$rand_or_list[$depth_t] = [];
$rfor_semicolon_list[$depth_t] = [];
$i_equals[$depth_t] = -1;
# these arrays must retain values between calls
if ( $changed_seqno || !defined( $has_broken_sublist[$depth_t] ) ) {
$dont_align[$depth_t] = 0;
$has_broken_sublist[$depth_t] = 0;
$want_comma_break[$depth_t] = 0;
}
}
return;
} ## end sub check_for_new_minimum_depth
sub set_comma_breakpoints {
my ( $self, $i, $dd, $rbond_strength_bias ) = @_;
# Decide which commas to break at within a container
# Given:
# $i = index of current token in main loop over tokens, or
# = $max_index_to_go + 1 for post-loop operations (c410)
# $dd = stack depth
# $rbond-strength_bias = ref to array of bond strength biases which
# may be updated for commas not in lists
# Return:
# $bp_count = number of comma breakpoints set
# $do_not_break_apart = a flag indicating if container need not
# be broken open
my $bp_count = 0;
my $do_not_break_apart = 0;
# anything to do?
if ( $item_count_stack[$dd] ) {
# Do not break a list unless there are some non-line-ending commas.
# This avoids getting different results with only non-essential
# commas, and fixes b1192.
my $seqno = $type_sequence_stack[$dd];
my $real_comma_count =
$seqno ? $self->[_rtype_count_by_seqno_]->{$seqno}->{','} : 1;
# handle commas not in containers...
if ( $dont_align[$dd] ) {
$self->do_uncontained_comma_breaks( $dd, $rbond_strength_bias );
}
# handle commas within containers...
elsif ($real_comma_count) {
my $fbc = $forced_breakpoint_count;
# always open comma lists not preceded by keywords,
# barewords, identifiers (that is, anything that doesn't
# look like a function call)
# c250: added new sub identifier type 'S'
my $must_break_open =
$last_nonblank_type_stack[$dd] !~ /^[kwiUS]$/;
$self->table_maker(
{
## depth => $dd,
i_opening_paren => $opening_structure_index_stack[$dd],
i_closing_paren => $i,
item_count => $item_count_stack[$dd],
identifier_count => $identifier_count_stack[$dd],
rcomma_index => $comma_index[$dd],
next_nonblank_type => $next_nonblank_type,
list_type => $container_type[$dd],
interrupted => $interrupted_list[$dd],
rdo_not_break_apart => \$do_not_break_apart,
must_break_open => $must_break_open,
has_broken_sublist => $has_broken_sublist[$dd],
}
);
$bp_count = $forced_breakpoint_count - $fbc;
$do_not_break_apart = 0 if $must_break_open;
}
else {
## no real commas, nothing to do
}
}
return ( $bp_count, $do_not_break_apart );
} ## end sub set_comma_breakpoints
# These types are excluded at breakpoints to prevent blinking
# Switched from excluded to included as part of fix for b1214
my %is_uncontained_comma_break_included_type;
BEGIN {
my @q = qw# k R } ) ] Y Z U w i q Q .
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= #;
@is_uncontained_comma_break_included_type{@q} = (1) x scalar(@q);
} ## end BEGIN
sub do_uncontained_comma_breaks {
my ( $self, $dd, $rbond_strength_bias ) = @_;
# Handle commas not in containers
# Given:
# $dd = depth of this layer of commas
# $rbond_strength_bias = array of bond strengths to be updated
# This is a catch-all routine for commas that we
# don't know what to do with because the don't fall
# within containers. We will bias the bond strength
# to break at commas which ended lines in the input
# file. This usually works better than just trying
# to put as many items on a line as possible. A
# downside is that if the input file is garbage it
# won't work very well. However, the user can always
# prevent following the old breakpoints with the
# -iob flag.
# Check added for issue c131; an error here would be due to an
# error initializing @comma_index when entering depth $dd.
if (DEVEL_MODE) {
foreach my $ii ( @{ $comma_index[$dd] } ) {
if ( $ii < 0 || $ii > $max_index_to_go ) {
my $KK = $K_to_go[0];
my $lno = $self->[_rLL_]->[$KK]->[_LINE_INDEX_];
Fault(<<EOM);
Bad comma index near line $lno: i=$ii must be between 0 and $max_index_to_go
EOM
}
}
}
my $bias = -.01;
my $old_comma_break_count = 0;
foreach my $ii ( @{ $comma_index[$dd] } ) {
if ( $old_breakpoint_to_go[$ii] ) {
$old_comma_break_count++;
# Store the bias info for use by sub set_bond_strength
push @{$rbond_strength_bias}, [ $ii, $bias ];
# reduce bias magnitude to force breaks in order
$bias *= 0.99;
}
}
# Also put a break before the first comma if
# (1) there was a break there in the input, and
# (2) there was exactly one old break before the first comma break
# (3) OLD: there are multiple old comma breaks
# (3) NEW: there are one or more old comma breaks (see return example)
# (4) the first comma is at the starting level ...
# ... fixes cases b064 b065 b068 b210 b747
# (5) the batch does not start with a ci>0 [ignore a ci change by -xci]
# ... fixes b1220. If ci>0 we are in the middle of a snippet,
# maybe because -boc has been forcing out previous lines.
# For example, we will follow the user and break after
# 'print' in this snippet:
# print
# "conformability (Not the same dimension)\n",
# "\t", $have, " is ", text_unit($hu), "\n",
# "\t", $want, " is ", text_unit($wu), "\n",
# ;
#
# Another example, just one comma, where we will break after
# the return:
# return
# $x * cos($a) - $y * sin($a),
# $x * sin($a) + $y * cos($a);
# Breaking a print statement:
# print SAVEOUT
# ( $? & 127 ) ? " (SIG#" . ( $? & 127 ) . ")" : "",
# ( $? & 128 ) ? " -- core dumped" : "", "\n";
#
# But we will not force a break after the opening paren here
# (causes a blinker):
# $heap->{stream}->set_output_filter(
# poe::filter::reference->new('myotherfreezer') ),
# ;
#
my $i_first_comma = $comma_index[$dd]->[0];
my $level_comma = $levels_to_go[$i_first_comma];
my $ci_start = $ci_levels_to_go[0];
# Here we want to use the value of ci before any -xci adjustment
if ( $ci_start && $rOpts_extended_continuation_indentation ) {
my $K0 = $K_to_go[0];
if ( $self->[_rseqno_controlling_my_ci_]->{$K0} ) { $ci_start = 0 }
}
if ( !$ci_start
&& $old_breakpoint_to_go[$i_first_comma]
&& $level_comma == $levels_to_go[0] )
{
my $ibreak = -1;
my $obp_count = 0;
foreach my $ii ( reverse( 0 .. $i_first_comma - 1 ) ) {
if ( $old_breakpoint_to_go[$ii] ) {
$obp_count++;
last if ( $obp_count > 1 );
$ibreak = $ii
if ( $levels_to_go[$ii] == $level_comma );
}
}
# Changed rule from multiple old commas to just one here:
if ( $ibreak >= 0 && $obp_count == 1 && $old_comma_break_count > 0 )
{
my $ibreak_m = $ibreak;
$ibreak_m-- if ( $types_to_go[$ibreak_m] eq 'b' );
if ( $ibreak_m >= 0 ) {
# In order to avoid blinkers we have to be fairly
# restrictive:
# OLD Rules:
# Rule 1: Do not to break before an opening token
# Rule 2: avoid breaking at ternary operators
# (see b931, which is similar to the above print example)
# Rule 3: Do not break at chain operators to fix case b1119
# - The previous test was '$typem !~ /^[\(\{\[L\?\:]$/'
# NEW Rule, replaced above rules after case b1214:
# only break at one of the included types
# Be sure to test any changes to these rules against runs
# with -l=0 such as the 'bbvt' test (perltidyrc_colin)
# series.
my $type_m = $types_to_go[$ibreak_m];
# Switched from excluded to included for b1214. If necessary
# the token could also be checked if type_m eq 'k'
if ( $is_uncontained_comma_break_included_type{$type_m} ) {
# Rule added to fix b1449:
# Do not break before a '?' if -nbot is set
# Otherwise, we may alternately arrive here and
# set the break, or not, depending on the input.
my $no_break;
my $ibreak_p = $inext_to_go[$ibreak_m];
if ( !$rOpts_break_at_old_ternary_breakpoints
&& $ibreak_p <= $max_index_to_go )
{
my $type_p = $types_to_go[$ibreak_p];
$no_break = $type_p eq '?';
}
$self->set_forced_breakpoint($ibreak)
if ( !$no_break );
}
}
}
}
return;
} ## end sub do_uncontained_comma_breaks
my %is_logical_container;
my %quick_filter_A;
my %quick_filter_B;
BEGIN {
my @q = qw# if elsif unless while and or err not && | || ? : ! #;
@is_logical_container{@q} = (1) x scalar(@q);
# Filters to allow most tokens to skip past tedious if-elsif blocks
%quick_filter_A = %is_assignment;
@q = qw( || && f k );
@quick_filter_A{@q} = (1) x scalar(@q);
%quick_filter_B = %is_assignment;
@q = qw# => . ; < > ~ #;
push @q, ',';
push @q, 'f'; # added for ';' for issue c154
@quick_filter_B{@q} = (1) x scalar(@q);
} ## end BEGIN
sub set_for_semicolon_breakpoints {
my ( $self, $dd ) = @_;
# Given:
# $dd = depth of this layer
# Set breakpoints for semicolons in C-style 'for' containers
foreach ( @{ $rfor_semicolon_list[$dd] } ) {
$self->set_forced_breakpoint($_);
}
return;
} ## end sub set_for_semicolon_breakpoints
sub set_logical_breakpoints {
my ( $self, $dd ) = @_;
# Given:
# $dd = depth of this layer
# Set breakpoints at logical operators
if (
$item_count_stack[$dd] == 0
&& $is_logical_container{ $container_type[$dd] }
|| $has_old_logical_breakpoints[$dd]
)
{
# Look for breaks in this order:
# 0 1 2 3
# or and || &&
foreach my $ii ( 0 .. 3 ) {
if ( $rand_or_list[$dd][$ii] ) {
foreach ( @{ $rand_or_list[$dd][$ii] } ) {
$self->set_forced_breakpoint($_);
}
# break at any 'if' and 'unless' too
foreach ( @{ $rand_or_list[$dd][4] } ) {
$self->set_forced_breakpoint($_);
}
$rand_or_list[$dd] = [];
last;
}
}
}
return;
} ## end sub set_logical_breakpoints
sub is_unbreakable_container {
my $dd = shift;
# Given:
# $dd = depth of this layer
# Return:
# true if the container should not be broken
# false otherwise
# never break a container of one of these types
# because bad things can happen (map1.t):
return $is_sort_map_grep{ $container_type[$dd] };
} ## end sub is_unbreakable_container
sub break_lists {
my ( $self, $is_long_line, $rbond_strength_bias ) = @_;
# This routine is called once per batch, if the batch is a list, to
# set line breaks so that hierarchical structure can be displayed and
# so that list items can be vertically aligned.
# Given:
# $is_long_line = true if this batch requires multiple output lines
# $rbond_strength_bias = array of bond strengths to be updated
# Task:
# Update the array @forced_breakpoint_to_go with breakpoints.
# This array is used by sub 'break_long_lines' to set final
# breakpoints.
# This is probably the most complex routine in perltidy,
# so I have broken it into pieces and over-commented it.
$starting_depth = $nesting_depth_to_go[0];
$block_type = SPACE;
$current_depth = $starting_depth;
$i_last_colon = -1;
$i_line_end = -1;
$i_line_start = -1;
$last_nonblank_token = ';';
$last_nonblank_type = ';';
$last_nonblank_block_type = SPACE;
$last_old_breakpoint_count = 0;
$minimum_depth = $current_depth + 1; # forces update in check below
$old_breakpoint_count = 0;
$starting_breakpoint_count = $forced_breakpoint_count;
$token = ';';
$type = ';';
$type_sequence = EMPTY_STRING;
my $total_depth_variation = 0;
my $i_old_assignment_break;
my $depth_last = $starting_depth;
my $comma_follows_last_closing_token;
$self->check_for_new_minimum_depth( $current_depth,
$parent_seqno_to_go[0] )
if ( $current_depth < $minimum_depth );
my $i_want_previous_break = -1;
my $saw_good_breakpoint;
#----------------------------------------
# Main loop over all tokens in this batch
#----------------------------------------
foreach my $i ( 0 .. $max_index_to_go ) {
if ( $type ne 'b' ) {
$i_last_nonblank_token = $i - 1;
$last_nonblank_type = $type;
$last_nonblank_token = $token;
$last_nonblank_block_type = $block_type;
}
# set break if flag was set
if ( $i_want_previous_break >= 0 ) {
$self->set_forced_breakpoint($i_want_previous_break);
$i_want_previous_break = -1;
}
$type = $types_to_go[$i];
next if ( $type eq 'b' );
$last_old_breakpoint_count = $old_breakpoint_count;
$block_type = $block_type_to_go[$i];
$token = $tokens_to_go[$i];
$type_sequence = $type_sequence_to_go[$i];
my $i_next_nonblank = $inext_to_go[$i];
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
$next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
#-------------------------------------------
# Loop Section A: Look for special breakpoints...
#-------------------------------------------
# Check for a good old breakpoint ..
if ( $old_breakpoint_to_go[$i] ) {
( $i_want_previous_break, $i_old_assignment_break ) =
$self->examine_old_breakpoint( $i, $i_next_nonblank,
$i_want_previous_break, $i_old_assignment_break );
}
$depth = $nesting_depth_to_go[ $i + 1 ];
$total_depth_variation += abs( $depth - $depth_last );
$depth_last = $depth;
# safety check - be sure we always break after a comment
# Shouldn't happen .. an error here probably means that the
# nobreak flag did not get turned off correctly during
# formatting.
if ( $type eq '#' ) {
if ( $i != $max_index_to_go ) {
if (DEVEL_MODE) {
Fault(<<EOM);
Non-fatal program bug: backup logic required to break after a comment
EOM
}
$nobreak_to_go[$i] = 0;
$self->set_forced_breakpoint($i);
} ## end if ( $i != $max_index_to_go)
} ## end if ( $type eq '#' )
# Force breakpoints at certain tokens in long lines.
# Note that such breakpoints will be undone later if these tokens
# are fully contained within parens on a line.
if (
# break before a keyword within a line
$type eq 'k'
&& $i > 0
# if one of these keywords:
&& $is_if_unless_while_until_for_foreach{$token}
# but do not break at something like '1 while'
&& ( $last_nonblank_type ne 'n' || $i > 2 )
# and let keywords follow a closing 'do' brace
&& ( !$last_nonblank_block_type
|| $last_nonblank_block_type ne 'do' )
&& (
$is_long_line
# or container is broken (by side-comment, etc)
|| (
$next_nonblank_token eq '('
&& ( !defined( $mate_index_to_go[$i_next_nonblank] )
|| $mate_index_to_go[$i_next_nonblank] < $i )
)
)
)
{
$self->set_forced_breakpoint( $i - 1 );
}
# remember locations of '||' and '&&' for possible breaks if we
# decide this is a long logical expression.
if ( $quick_filter_A{$type} ) {
if ( $type eq '||' ) {
push @{ $rand_or_list[$depth][2] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
elsif ( $type eq '&&' ) {
push @{ $rand_or_list[$depth][3] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
elsif ( $type eq 'f' ) {
push @{ $rfor_semicolon_list[$depth] }, $i;
}
elsif ( $type eq 'k' ) {
if ( $token eq 'and' ) {
push @{ $rand_or_list[$depth][1] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
# break immediately at 'or's which are probably not in a
# logical block -- but we will break in logical breaks
# below so that they do not add to the
# forced_breakpoint_count
elsif ( $token eq 'or' ) {
push @{ $rand_or_list[$depth][0] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
if ( $is_logical_container{ $container_type[$depth] } )
{
}
else {
if ($is_long_line) {
$self->set_forced_breakpoint($i);
}
elsif ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints )
{
$saw_good_breakpoint = 1;
}
else {
## not a good break
}
}
}
elsif ( $token eq 'if' || $token eq 'unless' ) {
push @{ $rand_or_list[$depth][4] }, $i;
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints )
{
$self->set_forced_breakpoint($i);
}
}
else {
## not one of: 'and' 'or' 'if' 'unless'
}
}
elsif ( $is_assignment{$type} ) {
$i_equals[$depth] = $i;
}
else {
# error : no code to handle a type in %quick_filter_A
DEVEL_MODE && Fault(<<EOM);
Missing code to handle token type '$type' which is in the quick_filter_A
EOM
}
}
if ($type_sequence) {
#-----------------------------------------
# Loop Section B: Handle a sequenced token
#-----------------------------------------
$self->break_lists_type_sequence($i);
#------------------------------------------
# Loop Section C: Handle Increasing Depth..
#------------------------------------------
# hardened against bad input syntax: depth jump must be 1 and
# type must be opening..fixes c102
if ( $depth == $current_depth + 1 && $is_opening_type{$type} ) {
$self->break_lists_increasing_depth($i);
}
#------------------------------------------
# Loop Section D: Handle Decreasing Depth..
#------------------------------------------
# hardened against bad input syntax: depth jump must be 1 and
# type must be closing .. fixes c102
elsif ($depth == $current_depth - 1
&& $is_closing_type{$type} )
{
# Note that $rbond_strength_bias will not get changed by
# this call. It gets changed in the call to
# set_comma_breakpoints at the end of this routine for
# commas not in lists.
$self->break_lists_decreasing_depth( $i,
$rbond_strength_bias );
$comma_follows_last_closing_token =
$next_nonblank_type eq ',' || $next_nonblank_type eq '=>';
}
else {
## not a depth change
}
}
#----------------------------------
# Loop Section E: Handle this token
#----------------------------------
$current_depth = $depth;
# most token types can skip the rest of this loop
next if ( !$quick_filter_B{$type} );
# Turn off comma alignment if we are sure that this is not a list
# environment. To be safe, we will do this if we see certain
# non-list tokens, such as ';', '=', and also the environment is
# not a list.
## $type =~ /^[\;\<\>\~f]$/ || $is_assignment{$type}
if ( $is_non_list_type{$type} ) {
if ( !$self->is_in_list_by_i($i) ) {
$dont_align[$depth] = 1;
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
# no special comma breaks in C-style 'for' terms (c154)
if ( $type eq 'f' ) { $last_comma_index[$depth] = undef }
}
}
# handle any commas
elsif ( $type eq ',' ) {
$self->study_comma( $i, $comma_follows_last_closing_token );
}
# handle comma-arrow
elsif ( $type eq '=>' ) {
next if ( $last_nonblank_type eq '=>' );
next if $rOpts_break_at_old_comma_breakpoints;
next
if ( $rOpts_comma_arrow_breakpoints == 3
&& !defined( $override_cab3[$depth] ) );
$want_comma_break[$depth] = 1;
$index_before_arrow[$depth] = $i_last_nonblank_token;
next;
}
elsif ( $type eq '.' ) {
$last_dot_index[$depth] = $i;
}
else {
# error : no code to handle a type in %quick_filter_B
DEVEL_MODE && Fault(<<EOM);
Missing code to handle token type '$type' which is in the quick_filter_B
EOM
}
} ## end main loop over tokens
#----------------------------------------
# Now set breaks for any unfinished lists
#----------------------------------------
foreach my $dd ( reverse( $minimum_depth .. $current_depth ) ) {
$interrupted_list[$dd] = 1;
$has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
if ( $item_count_stack[$dd] ) {
$self->set_comma_breakpoints( $max_index_to_go + 1,
$dd, $rbond_strength_bias );
}
$self->set_logical_breakpoints($dd)
if ( $has_old_logical_breakpoints[$dd] );
$self->set_for_semicolon_breakpoints($dd)
if ( @{ $rfor_semicolon_list[$dd] } );
# break open container...
my $i_opening = $opening_structure_index_stack[$dd];
if (
defined($i_opening)
&& $i_opening >= 0
&& !is_unbreakable_container($dd)
# Avoid a break which would place an isolated ' or "
# on a line
&& !(
$type eq 'Q'
&& $i_opening >= $max_index_to_go - 2
&& ( $token eq "'" || $token eq '"' )
)
)
{
$self->set_forced_breakpoint($i_opening);
}
} ## end for ( my $dd = $current_depth...)
#------------------------------------------------
# Set the return the flag '$saw_good_breakpoint'.
#------------------------------------------------
# This flag indicates if the input file had some good breakpoints.
# It will be used to force a break in a line shorter than the
# allowed line length.
if ( $has_old_logical_breakpoints[$current_depth] ) {
$saw_good_breakpoint = 1;
}
# A complex line with one break at an = has a good breakpoint.
# This is not complex ($total_depth_variation=0):
# $res1
# = 10;
#
# This is complex ($total_depth_variation=6):
# $res2 =
# (is_boundp("a", 'self-insert') && is_boundp("b", 'self-insert'));
# The check ($i_old_.. < $max_index_to_go) was added to fix b1333
elsif ($i_old_assignment_break
&& $total_depth_variation > 4
&& $old_breakpoint_count == 1
&& $i_old_assignment_break < $max_index_to_go )
{
$saw_good_breakpoint = 1;
}
else {
## not a good breakpoint
}
return $saw_good_breakpoint;
} ## end sub break_lists
sub study_comma {
my ( $self, $i, $comma_follows_last_closing_token ) = @_;
# Study and store info for a list comma
# Given:
# $i = index of this comma in the _to_go output batch array
# $comma_follows_last_closing_token = true if it follows ')' '}' or ']'
$last_dot_index[$depth] = undef;
$last_comma_index[$depth] = $i;
# break here if this comma follows a '=>'
# but not if there is a side comment after the comma
if ( $want_comma_break[$depth] ) {
if ( $is_closing_type{$next_nonblank_type} ) {
if ($rOpts_comma_arrow_breakpoints) {
$want_comma_break[$depth] = 0;
return;
}
}
$self->set_forced_breakpoint($i)
unless ( $next_nonblank_type eq '#' );
# break before the previous token if it looks safe
# Example of something that we will not try to break before:
# DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
# Also we don't want to break at a binary operator (like +):
# $c->createOval(
# $x + $R, $y +
# $R => $x - $R,
# $y - $R, -fill => 'black',
# );
my $ibreak = $index_before_arrow[$depth] - 1;
if ( $ibreak > 0
&& !$is_closing_token{ $tokens_to_go[ $ibreak + 1 ] } )
{
if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
# don't break before a comma, as in the following:
# ( LONGER_THAN,=> 1,
# EIGHTY_CHARACTERS,=> 2,
# CAUSES_FORMATTING,=> 3,
# LIKE_THIS,=> 4,
# );
# This example is for -tso but should be general rule
if ( $types_to_go[ $ibreak + 1 ] ne '->'
&& $types_to_go[ $ibreak + 1 ] ne ',' )
{
$self->set_forced_breakpoint($ibreak);
}
}
}
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
# handle list which mixes '=>'s and ','s:
# treat any list items so far as an interrupted list
$interrupted_list[$depth] = 1;
return;
}
# Break after all commas above starting depth...
# But only if the last closing token was followed by a comma,
# to avoid breaking a list operator (issue c119)
if ( $depth < $starting_depth
&& $comma_follows_last_closing_token
&& !$dont_align[$depth] )
{
$self->set_forced_breakpoint($i)
unless ( $next_nonblank_type eq '#' );
return;
}
# add this comma to the list..
my $item_count = $item_count_stack[$depth];
if ( $item_count == 0 ) {
# but do not form a list with no opening structure
# for example:
# open INFILE_COPY, ">$input_file_copy"
# or die ("very long message");
if ( ( $opening_structure_index_stack[$depth] < 0 )
&& $self->is_in_block_by_i($i) )
{
$dont_align[$depth] = 1;
}
}
$comma_index[$depth][$item_count] = $i;
++$item_count_stack[$depth];
if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
$identifier_count_stack[$depth]++;
}
return;
} ## end sub study_comma
my %poor_types;
my %poor_keywords;
my %poor_next_types;
my %poor_next_keywords;
BEGIN {
# Setup filters for detecting very poor breaks to ignore.
# b1097: old breaks after type 'L' and before 'R' are poor
# b1450: old breaks at 'eq' and related operators are poor
my @q = qw( == <= >= != );
@poor_types{@q} = (1) x scalar(@q);
@poor_next_types{@q} = (1) x scalar(@q);
$poor_types{'L'} = 1;
$poor_next_types{'R'} = 1;
@q = qw( eq ne le ge lt gt );
@poor_keywords{@q} = (1) x scalar(@q);
@poor_next_keywords{@q} = (1) x scalar(@q);
} ## end BEGIN
sub examine_old_breakpoint {
my ( $self, $i, $i_next_nonblank, $i_want_previous_break,
$i_old_assignment_break )
= @_;
# Look at an old breakpoint and set/update certain flags:
# Given indexes of three tokens in this batch:
# $i_next_nonblank - index of the next nonblank token
# $i_want_previous_break - we want a break before this index
# $i_old_assignment_break - the index of an '=' or equivalent
# Update:
# $old_breakpoint_count - a counter to increment unless poor break
# Update and return:
# $i_want_previous_break
# $i_old_assignment_break
#-----------------------
# Filter out poor breaks
#-----------------------
# Just return if this is a poor break and pretend it does not exist.
# Otherwise, poor breaks made under stress can cause instability.
my $poor_break;
if ( $type eq 'k' ) { $poor_break ||= $poor_keywords{$token} }
else { $poor_break ||= $poor_types{$type} }
if ( $next_nonblank_type eq 'k' ) {
$poor_break ||= $poor_next_keywords{$next_nonblank_token};
}
else {
$poor_break ||= $poor_next_types{$next_nonblank_type};
}
# Also ignore any high stress level breaks; fixes b1395
$poor_break ||= $levels_to_go[$i] >= $high_stress_level;
if ($poor_break) { goto RETURN }
#--------------------------------------------
# Not a poor break, so continue to examine it
#--------------------------------------------
$old_breakpoint_count++;
$i_line_end = $i;
$i_line_start = $i_next_nonblank;
#---------------------------------------
# Do we want to break before this token?
#---------------------------------------
# Break before certain keywords if user broke there and
# this is a 'safe' break point. The idea is to retain
# any preferred breaks for sequential list operations,
# like a schwartzian transform.
if ($rOpts_break_at_old_keyword_breakpoints) {
if (
$next_nonblank_type eq 'k'
&& $is_keyword_returning_list{$next_nonblank_token}
&& ( $type =~ /^[=\)\]\}Riw]$/
|| $type eq 'k' && $is_keyword_returning_list{$token} )
)
{
# we actually have to set this break next time through
# the loop because if we are at a closing token (such
# as '}') which forms a one-line block, this break might
# get undone.
# But do not do this at an '=' if:
# - the user wants breaks before an equals (b434 b903)
# - or -naws is set (can be unstable, see b1354)
my $skip = $type eq '='
&& ( $want_break_before{$type}
|| !$rOpts_add_whitespace );
$i_want_previous_break = $i
unless ($skip);
}
}
# Break before attributes if user broke there
if ($rOpts_break_at_old_attribute_breakpoints) {
if ( $next_nonblank_type eq 'A' ) {
$i_want_previous_break = $i;
}
}
#---------------------------------
# Is this an old assignment break?
#---------------------------------
if ( $is_assignment{$type} ) {
$i_old_assignment_break = $i;
}
elsif ( $is_assignment{$next_nonblank_type} ) {
$i_old_assignment_break = $i_next_nonblank;
}
else {
## not old assignment break
}
RETURN:
return ( $i_want_previous_break, $i_old_assignment_break );
} ## end sub examine_old_breakpoint
sub break_lists_type_sequence {
my ( $self, $i ) = @_;
# We have encountered a sequenced token while setting list breakpoints
# Given:
# $i = index of this token in the _to_go output batch array
# if closing type, one of } ) ] :
if ( $is_closing_sequence_token{$token} ) {
if ( $type eq ':' ) {
$i_last_colon = $i;
# retain break at a ':' line break
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_ternary_breakpoints
&& $levels_to_go[$i] < $high_stress_level )
{
$self->set_forced_breakpoint($i);
# Break at a previous '=', but only if it is before
# the mating '?'. Mate_index test fixes b1287.
my $ieq = $i_equals[$depth];
my $mix = $mate_index_to_go[$i];
if ( !defined($mix) ) { $mix = -1 }
if ( $ieq > 0 && $ieq < $mix ) {
$self->set_forced_breakpoint( $i_equals[$depth] );
$i_equals[$depth] = -1;
}
}
}
# handle any postponed closing breakpoints
if ( has_postponed_breakpoint($type_sequence) ) {
my $inc = ( $type eq ':' ) ? 0 : 1;
if ( $i >= $inc ) {
$self->set_forced_breakpoint( $i - $inc );
}
}
}
# must be opening token, one of { ( [ ?
else {
# set breaks at ?/: if they will get separated (and are
# not a ?/: chain), or if the '?' is at the end of the
# line
if ( $token eq '?' ) {
my $i_colon = $mate_index_to_go[$i];
if (
!defined($i_colon) # the ':' is not in this batch
|| $i == 0 # this '?' is the first token of the line
|| $i == $max_index_to_go # or this '?' is the last token
)
{
# don't break if # this has a side comment, and
# don't break at a '?' if preceded by ':' on
# this line of previous ?/: pair on this line.
# This is an attempt to preserve a chain of ?/:
# expressions (elsif2.t).
if (
(
$i_last_colon < 0
|| $parent_seqno_to_go[$i_last_colon] !=
$parent_seqno_to_go[$i]
)
&& $tokens_to_go[$max_index_to_go] ne '#'
)
{
$self->set_forced_breakpoint($i);
}
$self->set_closing_breakpoint($i);
}
}
# must be one of { ( [
else {
# do requested -lp breaks at the OPENING token for BROKEN
# blocks. NOTE: this can be done for both -lp and -xlp,
# but only -xlp can really take advantage of this. So this
# is currently restricted to -xlp to avoid excess changes to
# existing -lp formatting.
if ( $rOpts_extended_line_up_parentheses
&& !defined( $mate_index_to_go[$i] ) )
{
my $lp_object =
$self->[_rlp_object_by_seqno_]->{$type_sequence};
if ($lp_object) {
my $K_begin_line = $lp_object->get_K_begin_line();
my $i_begin_line = $K_begin_line - $K_to_go[0];
$self->set_forced_lp_break( $i_begin_line, $i );
}
}
}
}
return;
} ## end sub break_lists_type_sequence
sub break_lists_increasing_depth {
my ( $self, $i ) = @_;
# Given:
# $i = index of this token in the _to_go output batch array
#--------------------------------------------
# prepare for a new list when depth increases
# token $i is a '(','{', or '['
#--------------------------------------------
#----------------------------------------------------------
# BEGIN initialize depth arrays
# ... use the same order as sub check_for_new_minimum_depth
#----------------------------------------------------------
$type_sequence_stack[$depth] = $type_sequence;
$override_cab3[$depth] = undef;
if ( $rOpts_comma_arrow_breakpoints == 3 && $type_sequence ) {
$override_cab3[$depth] =
$self->[_roverride_cab3_]->{$type_sequence};
}
$breakpoint_stack[$depth] = $forced_breakpoint_count;
$container_type[$depth] =
# k => && || ? : .
$is_container_label_type{$last_nonblank_type}
? $last_nonblank_token
: EMPTY_STRING;
$identifier_count_stack[$depth] = 0;
$index_before_arrow[$depth] = -1;
$interrupted_list[$depth] = 0;
$item_count_stack[$depth] = 0;
$last_nonblank_type_stack[$depth] = $last_nonblank_type;
$opening_structure_index_stack[$depth] = $i;
$breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
$comma_index[$depth] = undef;
$last_comma_index[$depth] = undef;
$last_dot_index[$depth] = undef;
$old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
$has_old_logical_breakpoints[$depth] = 0;
$rand_or_list[$depth] = [];
$rfor_semicolon_list[$depth] = [];
$i_equals[$depth] = -1;
# if line ends here then signal closing token to break
if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) {
$self->set_closing_breakpoint($i);
}
# Not all lists of values should be vertically aligned..
$dont_align[$depth] =
# code BLOCKS are handled at a higher level
$block_type
# certain paren lists
|| ( $type eq '(' ) && (
# it does not usually look good to align a list of
# identifiers in a parameter list, as in:
# my($var1, $var2, ...)
# (This test should probably be refined, for now I'm just
# testing for any keyword)
( $last_nonblank_type eq 'k' )
# a trailing '(' usually indicates a non-list
|| ( $next_nonblank_type eq '(' )
);
$has_broken_sublist[$depth] = 0;
$want_comma_break[$depth] = 0;
#----------------------------
# END initialize depth arrays
#----------------------------
# patch to outdent opening brace of long if/for/..
# statements (like this one). See similar coding in
# set_continuation breaks. We have also catch it here for
# short line fragments which otherwise will not go through
# break_long_lines.
if (
$block_type
# if we have the ')' but not its '(' in this batch..
&& ( $last_nonblank_token eq ')' )
&& !defined( $mate_index_to_go[$i_last_nonblank_token] )
# and user wants brace to left
&& !$rOpts_opening_brace_always_on_right
&& ( $type eq '{' ) # should be true
&& ( $token eq '{' ) # should be true
)
{
$self->set_forced_breakpoint( $i - 1 );
}
return;
} ## end sub break_lists_increasing_depth
sub break_lists_decreasing_depth {
my ( $self, $i, $rbond_strength_bias ) = @_;
# Given:
# $i = index of this token in the _to_go output batch array
# $rbond_strength_bias = list of bond strengths to be updated
# We have arrived at a closing container token in sub break_lists:
# the token at index $i is one of these: ')','}', ']'
# A number of important breakpoints for this container can now be set
# based on the information that we have collected. This includes:
# - breaks at commas to format tables
# - breaks at certain logical operators and other good breakpoints
# - breaks at opening and closing containers if needed by selected
# formatting styles
# These breaks are made by calling sub 'set_forced_breakpoint'
# Note that $rbond_strength_bias is passed to sub
# set_comma_breakpoints, but it will not be changed. It only gets
# changed by later calls for incomplete lists.
$self->check_for_new_minimum_depth( $depth, $parent_seqno_to_go[$i] )
if ( $depth < $minimum_depth );
# force all outer logical containers to break after we see on
# old breakpoint
$has_old_logical_breakpoints[$depth] ||=
$has_old_logical_breakpoints[$current_depth];
# Patch to break between ') {' if the paren list is broken.
# There is similar logic in break_long_lines for
# non-broken lists.
if ( $token eq ')'
&& $next_nonblank_block_type
&& $interrupted_list[$current_depth]
&& $next_nonblank_type eq '{'
&& !$rOpts_opening_brace_always_on_right )
{
$self->set_forced_breakpoint($i);
}
#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
#-----------------------------------------------------------------
# Set breaks at commas to display a table of values if appropriate
#-----------------------------------------------------------------
my ( $bp_count, $do_not_break_apart ) = ( 0, 0 );
if ( $item_count_stack[$current_depth] ) {
( $bp_count, $do_not_break_apart ) =
$self->set_comma_breakpoints( $i, $current_depth,
$rbond_strength_bias );
}
#-----------------------------------------------------------
# Now set flags needed to decide if we should break open the
# container ... This is a long rambling section which has
# grown over time to handle all situations.
#-----------------------------------------------------------
my $i_opening = $opening_structure_index_stack[$current_depth];
my $saw_opening_structure = ( $i_opening >= 0 );
my $lp_object;
if ( $rOpts_line_up_parentheses && $saw_opening_structure ) {
$lp_object = $self->[_rlp_object_by_seqno_]
->{ $type_sequence_to_go[$i_opening] };
}
# this term is long if we had to break at interior commas..
my $is_long_term = $bp_count > 0;
# If this is a short container with one or more comma arrows,
# then we will mark it as a long term to open it if requested.
# $rOpts_comma_arrow_breakpoints =
# 0 - open only if comma precedes closing brace
# 1 - stable: except for one line blocks
# 2 - try to form 1 line blocks
# 3 - ignore =>
# 4 - always open up if vt=0
# 5 - stable: even for one line blocks if vt=0
my $cab_flag = $rOpts_comma_arrow_breakpoints;
# replace -cab=3 if overriden
if ( $cab_flag == 3 && $type_sequence ) {
my $test_cab = $self->[_roverride_cab3_]->{$type_sequence};
if ( defined($test_cab) ) { $cab_flag = $test_cab }
}
# PATCH: Modify the -cab flag if we are not processing a list:
# We only want the -cab flag to apply to list containers, so
# for non-lists we use the default and stable -cab=5 value.
# Fixes case b939a.
if ( $type_sequence && !$self->[_ris_list_by_seqno_]->{$type_sequence} )
{
$cab_flag = 5;
}
# Ignore old breakpoints when under stress.
# Fixes b1203 b1204 as well as b1197-b1200.
# But not if -lp: fixes b1264, b1265. NOTE: rechecked with
# b1264 to see if this check is still required at all, and
# these still require a check, but at higher level beta+3
# instead of beta: b1193 b780
if ( $saw_opening_structure
&& !$lp_object
&& $levels_to_go[$i_opening] >= $high_stress_level )
{
$cab_flag = 2;
# Do not break hash braces under stress (fixes b1238)
$do_not_break_apart ||= $types_to_go[$i_opening] eq 'L';
# This option fixes b1235, b1237, b1240 with old and new
# -lp, but formatting is nicer with next option.
## $is_long_term ||=
## $levels_to_go[$i_opening] > $stress_level_beta + 1;
# This option fixes b1240 but not b1235, b1237 with new -lp,
# but this gives better formatting than the previous option.
# NOTE: Testing in v20240501 showed that this check is no longer
# needed for stability, but there is little point in removing it.
$do_not_break_apart ||=
$levels_to_go[$i_opening] > $stress_level_beta;
}
if ( !$is_long_term
&& $saw_opening_structure
&& $is_opening_token{ $tokens_to_go[$i_opening] }
&& $index_before_arrow[ $depth + 1 ] > 0
&& !$opening_vertical_tightness{ $tokens_to_go[$i_opening] } )
{
$is_long_term =
$cab_flag == 4
|| $cab_flag == 0 && $last_nonblank_type eq ','
|| $cab_flag == 5 && $old_breakpoint_to_go[$i_opening];
}
# mark term as long if the length between opening and closing
# parens exceeds allowed line length
if ( !$is_long_term && $saw_opening_structure ) {
my $i_opening_minus = $self->find_token_starting_list($i_opening);
my $excess = $self->excess_line_length( $i_opening_minus, $i );
# Use standard spaces for indentation of lists in -lp mode
# if it gives a longer line length. This helps to avoid an
# instability due to forming and breaking one-line blocks.
# This fixes case b1314.
my $indentation = $leading_spaces_to_go[$i_opening_minus];
if ( ref($indentation)
&& $self->[_ris_broken_container_]->{$type_sequence} )
{
my $lp_spaces = $indentation->get_spaces();
my $std_spaces = $indentation->get_standard_spaces();
my $diff = $std_spaces - $lp_spaces;
if ( $diff > 0 ) { $excess += $diff }
}
my $tol = $length_tol;
# boost tol for an -lp container
if (
$lp_tol_boost
&& $lp_object
&& ( $rOpts_extended_continuation_indentation
|| !$self->[_ris_list_by_seqno_]->{$type_sequence} )
)
{
$tol += $lp_tol_boost;
}
# Patch to avoid blinking with -bbxi=2 and -cab=2
# in which variations in -ci cause unstable formatting
# in edge cases. We just always add one ci level so that
# the formatting is independent of the -BBX results.
# Fixes cases b1137 b1149 b1150 b1155 b1158 b1159 b1160
# b1161 b1166 b1167 b1168
if ( !$ci_levels_to_go[$i_opening]
&& $self->[_rbreak_before_container_by_seqno_]->{$type_sequence}
)
{
$tol += $rOpts_continuation_indentation;
}
$is_long_term = $excess + $tol > 0;
}
# We've set breaks after all comma-arrows. Now we have to
# undo them if this can be a one-line block
# (the only breakpoints set will be due to comma-arrows)
if (
# user doesn't require breaking after all comma-arrows
( $cab_flag != 0 ) && ( $cab_flag != 4 )
# and if the opening structure is in this batch
&& $saw_opening_structure
# and either on the same old line
&& (
$old_breakpoint_count_stack[$current_depth] ==
$last_old_breakpoint_count
# or user wants to form long blocks with arrows
# check on _rbreak_container_ added for b1500
|| ( $cab_flag == 2
&& !$self->[_rbreak_container_]->{$type_sequence} )
)
# and we made breakpoints between the opening and closing
&& ( $breakpoint_undo_stack[$current_depth] <
$forced_breakpoint_undo_count )
# and this block is short enough to fit on one line
# Note: use < because need 1 more space for possible comma
&& !$is_long_term
)
{
$self->undo_forced_breakpoint_stack(
$breakpoint_undo_stack[$current_depth] );
}
# now see if we have any comma breakpoints left
my $has_comma_breakpoints =
( $breakpoint_stack[$current_depth] != $forced_breakpoint_count );
# update broken-sublist flag of the outer container
$has_broken_sublist[$depth] =
$has_broken_sublist[$depth]
|| $has_broken_sublist[$current_depth]
|| $is_long_term
|| $has_comma_breakpoints;
# Having come to the closing ')', '}', or ']', now we have to decide
# if we should 'open up' the structure by placing breaks at the
# opening and closing containers. This is a tricky decision. Here
# are some of the basic considerations:
#
# -If this is a BLOCK container, then any breakpoints will have
# already been set (and according to user preferences), so we need do
# nothing here.
#
# -If we have a comma-separated list for which we can align the list
# items, then we need to do so because otherwise the vertical aligner
# cannot currently do the alignment.
#
# -If this container does itself contain a container which has been
# broken open, then it should be broken open to properly show the
# structure.
#
# -If there is nothing to align, and no other reason to break apart,
# then do not do it.
#
# We will not break open the parens of a long but 'simple' logical
# expression. For example:
#
# This is an example of a simple logical expression and its formatting:
#
# if ( $bigwasteofspace1 && $bigwasteofspace2
# || $bigwasteofspace3 && $bigwasteofspace4 )
#
# Most people would prefer this than the 'spacey' version:
#
# if (
# $bigwasteofspace1 && $bigwasteofspace2
# || $bigwasteofspace3 && $bigwasteofspace4
# )
#
# To illustrate the rules for breaking logical expressions, consider:
#
# FULLY DENSE:
# if ( $opt_excl
# and ( exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc ))
#
# This is on the verge of being difficult to read. The current
# default is to open it up like this:
#
# DEFAULT:
# if (
# $opt_excl
# and ( exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc )
# )
#
# This is a compromise which tries to avoid being too dense and to
# spacey. A more spaced version would be:
#
# SPACEY:
# if (
# $opt_excl
# and (
# exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc
# )
# )
#
# Some people might prefer the spacey version -- an option could be
# added. The innermost expression contains a long block '( exists
# $ids_... ')'.
#
# Here is how the logic goes: We will force a break at the 'or' that
# the innermost expression contains, but we will not break apart its
# opening and closing containers because (1) it contains no
# multi-line sub-containers itself, and (2) there is no alignment to
# be gained by breaking it open like this
#
# and (
# exists $ids_excl_uc{$id_uc}
# or grep $id_uc =~ /$_/, @ids_excl_uc
# )
#
# (although this looks perfectly ok and might be good for long
# expressions). The outer 'if' container, though, contains a broken
# sub-container, so it will be broken open to avoid too much density.
# Also, since it contains no 'or's, there will be a forced break at
# its 'and'.
# Handle the experimental flag --break-open-compact-parens
# NOTE: This flag is not currently used and may eventually be removed.
# If this flag is set, we will implement it by
# pretending we did not see the opening structure, since in that case
# parens always get opened up.
if ( $saw_opening_structure
&& $rOpts_break_open_compact_parens )
{
# This parameter is a one-character flag, as follows:
# '0' matches no parens -> break open NOT OK
# '1' matches all parens -> break open OK
# Other values are same as used by the weld-exclusion-list
my $flag = $rOpts_break_open_compact_parens;
if ( $flag eq '*'
|| $flag eq '1' )
{
$saw_opening_structure = 0;
}
else {
# NOTE: $seqno will be equal to closure var $type_sequence here
my $seqno = $type_sequence_to_go[$i_opening];
$saw_opening_structure =
!$self->match_paren_control_flag( $seqno, $flag );
}
}
# Set some more flags telling something about this container..
my $is_simple_logical_expression;
if ( $item_count_stack[$current_depth] == 0
&& $saw_opening_structure
&& $tokens_to_go[$i_opening] eq '('
&& $is_logical_container{ $container_type[$current_depth] } )
{
# This seems to be a simple logical expression with
# no existing breakpoints. Set a flag to prevent
# opening it up.
if ( !$has_comma_breakpoints ) {
$is_simple_logical_expression = 1;
}
#---------------------------------------------------
# This seems to be a simple logical expression with
# breakpoints (broken sublists, for example). Break
# at all 'or's and '||'s.
#---------------------------------------------------
else {
$self->set_logical_breakpoints($current_depth);
}
}
# break long terms at any C-style for semicolons (c154)
if ( $is_long_term
&& @{ $rfor_semicolon_list[$current_depth] } )
{
$self->set_for_semicolon_breakpoints($current_depth);
# and open up a long 'for' or 'foreach' container to allow
# leading term alignment unless -lp is used.
$has_comma_breakpoints = 1 unless ($lp_object);
}
#----------------------------------------------------------------
# FINALLY: Break open container according to the flags which have
# been set.
#----------------------------------------------------------------
if (
# breaks for code BLOCKS are handled at a higher level
!$block_type
# we do not need to break at the top level of an 'if'
# type expression
&& !$is_simple_logical_expression
## modification to keep ': (' containers vertically tight;
## but probably better to let user set -vt=1 to avoid
## inconsistency with other paren types
## && ($container_type[$current_depth] ne ':')
# otherwise, we require one of these reasons for breaking:
&& (
# - this term has forced line breaks
$has_comma_breakpoints
# - the opening container is separated from this batch
# for some reason (comment, blank line, code block)
# - this is a non-paren container spanning multiple lines
|| !$saw_opening_structure
# - this is a long block contained in another breakable
# container
|| $is_long_term && !$self->is_in_block_by_i($i_opening)
)
)
{
# do special -lp breaks at the CLOSING token for INTACT
# blocks (because we might not do them if the block does
# not break open)
if ($lp_object) {
my $K_begin_line = $lp_object->get_K_begin_line();
my $i_begin_line = $K_begin_line - $K_to_go[0];
$self->set_forced_lp_break( $i_begin_line, $i_opening );
}
# break after opening structure.
# note: break before closing structure will be automatic
if ( $minimum_depth <= $current_depth ) {
if ( $i_opening >= 0 ) {
if ( !$do_not_break_apart
&& !is_unbreakable_container($current_depth) )
{
$self->set_forced_breakpoint($i_opening);
# Do not let brace types L/R use vertical tightness
# flags to recombine if we have to break on length
# because instability is possible if both vt and vtc
# flags are set ... see issue b1444.
if ( $is_long_term
&& $types_to_go[$i_opening] eq 'L'
&& $opening_vertical_tightness{'{'}
&& $closing_vertical_tightness{'}'} )
{
my $seqno = $type_sequence_to_go[$i_opening];
if ($seqno) {
$self->[_rbreak_container_]->{$seqno} = 1;
}
}
}
}
# break at ',' of lower depth level before opening token
if ( $last_comma_index[$depth] ) {
$self->set_forced_breakpoint( $last_comma_index[$depth] );
}
# break at '.' of lower depth level before opening token
if ( $last_dot_index[$depth] ) {
$self->set_forced_breakpoint( $last_dot_index[$depth] );
}
# break before opening structure if preceded by another
# closing structure and a comma. This is normally
# done by the previous closing brace, but not
# if it was a one-line block.
if ( $i_opening > 2 ) {
my $i_prev =
( $types_to_go[ $i_opening - 1 ] eq 'b' )
? $i_opening - 2
: $i_opening - 1;
my $type_prev = $types_to_go[$i_prev];
my $token_prev = $tokens_to_go[$i_prev];
if (
$type_prev eq ','
&& ( $types_to_go[ $i_prev - 1 ] eq ')'
|| $types_to_go[ $i_prev - 1 ] eq '}' )
)
{
$self->set_forced_breakpoint($i_prev);
}
# also break before something like ':(' or '?('
# if appropriate.
elsif ($type_prev =~ /^([k\:\?]|&&|\|\|)$/
&& $want_break_before{$token_prev} )
{
$self->set_forced_breakpoint($i_prev);
}
else {
## not a breakpoint
}
}
}
# break after comma following closing structure
if ( $types_to_go[ $i + 1 ] eq ',' ) {
$self->set_forced_breakpoint( $i + 1 );
}
# break before an '=' following closing structure
if (
$is_assignment{$next_nonblank_type}
&& ( $breakpoint_stack[$current_depth] !=
$forced_breakpoint_count )
)
{
$self->set_forced_breakpoint($i);
}
# break at any comma before the opening structure Added
# for -lp, but seems to be good in general. It isn't
# obvious how far back to look; the '5' below seems to
# work well and will catch the comma in something like
# push @list, myfunc( $param, $param, ..
my $icomma = $last_comma_index[$depth];
if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
if ( !$forced_breakpoint_to_go[$icomma] ) {
$self->set_forced_breakpoint($icomma);
}
}
}
#-----------------------------------------------------------
# Break open a logical container open if it was already open
#-----------------------------------------------------------
elsif ($is_simple_logical_expression
&& $has_old_logical_breakpoints[$current_depth] )
{
$self->set_logical_breakpoints($current_depth);
}
# Handle long container which does not get opened up
elsif ($is_long_term) {
# must set fake breakpoint to alert outer containers that
# they are complex
set_fake_breakpoint();
}
else {
## do not break open
}
return;
} ## end sub break_lists_decreasing_depth
} ## end closure break_lists
my %is_kwiZ;
my %is_key_type;
BEGIN {
# Added 'w' to fix b1172
my @q = qw( k w i Z -> );
@is_kwiZ{@q} = (1) x scalar(@q);
# added = for b1211
@q = qw< ( [ { L R } ] ) = b >;
push @q, ',';
@is_key_type{@q} = (1) x scalar(@q);
} ## end BEGIN
use constant DEBUG_FIND_START => 0;
sub find_token_starting_list {
# When testing to see if a block will fit on one line, some
# previous token(s) may also need to be on the line; particularly
# if this is a sub call. So we will look back at least one
# token.
my ( $self, $i_opening_paren ) = @_;
# Given:
# $i_opening_paren = index of the opening token in the _to_go arrays
# note: it could be any of { [ (
# This will be the return index
my $i_opening_minus = $i_opening_paren;
if ( $i_opening_minus <= 0 ) {
return $i_opening_minus;
}
my $im1 = $i_opening_paren - 1;
my ( $iprev_nb, $type_prev_nb ) = ( $im1, $types_to_go[$im1] );
if ( $type_prev_nb eq 'b' && $iprev_nb > 0 ) {
$iprev_nb -= 1;
$type_prev_nb = $types_to_go[$iprev_nb];
}
if ( $type_prev_nb eq ',' ) {
# a previous comma is a good break point
# $i_opening_minus = $i_opening_paren;
}
elsif (
$tokens_to_go[$i_opening_paren] eq '('
# non-parens added here to fix case b1186
|| $is_kwiZ{$type_prev_nb}
)
{
$i_opening_minus = $im1;
# Walk back to improve length estimate...
# FIX for cases b1169 b1170 b1171: start walking back
# at the previous nonblank. This makes the result insensitive
# to the flag --space-function-paren, and similar.
# previous loop: for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
foreach my $j ( reverse( 0 .. $iprev_nb ) ) {
if ( $is_key_type{ $types_to_go[$j] } ) {
# fix for b1211
if ( $types_to_go[$j] eq '=' ) { $i_opening_minus = $j }
last;
}
$i_opening_minus = $j;
}
if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
}
# fix for b1467
elsif ( $type_prev_nb eq '=' ) {
if ( $want_break_before{$type_prev_nb} ) {
$i_opening_minus = $iprev_nb;
}
}
else {
## previous token not special
}
DEBUG_FIND_START && print <<EOM;
FIND_START: i=$i_opening_paren tok=$tokens_to_go[$i_opening_paren] => im=$i_opening_minus tok=$tokens_to_go[$i_opening_minus]
EOM
return $i_opening_minus;
} ## end sub find_token_starting_list
{ ## begin closure table_maker
use constant DEBUG_SPARSE => 0;
sub table_maker {
my ( $self, $rhash_IN ) = @_;
# Given a list of comma-separated items, set breakpoints at some of
# the commas, if necessary, to make it easy to read.
# This is done by making calls to 'set_forced_breakpoint'.
# This is a complex routine because there are many special cases.
# Returns: nothing
# The numerous variables involved are contained three hashes:
# $rhash_IN : For contents see the calling routine
# $rhash_A: For contents see return from sub 'table_layout_A'
# $rhash_B: For contents see return from sub 'table_layout_B'
# Find lengths of all list items needed for calculating page layout
my $rhash_A = table_layout_A($rhash_IN);
return if ( !defined($rhash_A) );
# Some variables received from caller...
my $i_closing_paren = $rhash_IN->{i_closing_paren};
my $i_opening_paren = $rhash_IN->{i_opening_paren};
my $has_broken_sublist = $rhash_IN->{has_broken_sublist};
my $interrupted = $rhash_IN->{interrupted};
#-----------------------------------------
# Section A: Handle some special cases ...
#-----------------------------------------
#-------------------------------------------------------------
# Special Case A1: Compound List Rule 1:
# Break at (almost) every comma for a list containing a broken
# sublist. This has higher priority than the Interrupted List
# Rule.
#-------------------------------------------------------------
if ($has_broken_sublist) {
$self->apply_broken_sublist_rule( $rhash_A, $interrupted );
return;
}
#--------------------------------------------------------------
# Special Case A2: Interrupted List Rule:
# A list is forced to use old breakpoints if it was interrupted
# by side comments or blank lines, or requested by user.
#--------------------------------------------------------------
if ( $rOpts_break_at_old_comma_breakpoints
|| $interrupted
|| $i_opening_paren < 0 )
{
my $i_first_comma = $rhash_A->{_i_first_comma};
my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
$self->copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
return;
}
#-----------------------------------------------------------------
# Special Case A3: If it fits on one line, return and let the line
# break logic decide if and where to break.
#-----------------------------------------------------------------
# The -bbxi=2 parameters can add an extra hidden level of indentation
# so they need a tolerance to avoid instability. Fixes b1259, 1260.
my $opening_token = $tokens_to_go[$i_opening_paren];
my $tol = 0;
if ( $break_before_container_types{$opening_token}
&& $container_indentation_options{$opening_token}
&& $container_indentation_options{$opening_token} == 2 )
{
$tol = $rOpts_indent_columns;
# use greater of -ci and -i (fix for case b1334)
if ( $tol < $rOpts_continuation_indentation ) {
$tol = $rOpts_continuation_indentation;
}
}
# Increase tol when -atc and -dtc are both used to allow for
# possible loss in length on next pass due to a comma. Fixes b1455.
if (
$rOpts_delete_trailing_commas
&& $rOpts_add_trailing_commas
# optional additional restriction which works for b1455:
&& $rOpts_extended_continuation_indentation
&& $rOpts_continuation_indentation > $rOpts_indent_columns
)
{
$tol += 1;
}
# c410: check for $i_closing_paren > $max_index_to_go
my $i_opening_minus = $self->find_token_starting_list($i_opening_paren);
my $iend = min( $i_closing_paren, $max_index_to_go );
my $excess = $self->excess_line_length( $i_opening_minus, $iend );
return if ( $excess + $tol <= 0 );
#---------------------------------------
# Section B: Handle a multiline list ...
#---------------------------------------
$self->break_multiline_list( $rhash_IN, $rhash_A, $i_opening_minus );
return;
} ## end sub table_maker
sub apply_broken_sublist_rule {
my ( $self, $rhash_A, $interrupted ) = @_;
# Break at (almost) every comma for a list containing a broken
# sublist.
my $ritem_lengths = $rhash_A->{_ritem_lengths};
my $ri_term_begin = $rhash_A->{_ri_term_begin};
my $ri_term_end = $rhash_A->{_ri_term_end};
my $ri_term_comma = $rhash_A->{_ri_term_comma};
my $item_count = $rhash_A->{_item_count_A};
my $i_first_comma = $rhash_A->{_i_first_comma};
my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
# Break at every comma except for a comma between two
# simple, small terms. This prevents long vertical
# columns of, say, just 0's.
my $small_length = 10; # 2 + actual maximum length wanted
# We'll insert a break in long runs of small terms to
# allow alignment in uniform tables.
my $skipped_count = 0;
my $columns = table_columns_available($i_first_comma);
my $fields = int( $columns / $small_length );
if ( $rOpts_maximum_fields_per_table
&& $fields > $rOpts_maximum_fields_per_table )
{
$fields = $rOpts_maximum_fields_per_table;
}
my $max_skipped_count = $fields - 1;
my $is_simple_last_term = 0;
my $is_simple_next_term = 0;
foreach my $j ( 0 .. $item_count ) {
$is_simple_last_term = $is_simple_next_term;
$is_simple_next_term = 0;
if ( $j < $item_count
&& $ri_term_end->[$j] == $ri_term_begin->[$j]
&& $ritem_lengths->[$j] <= $small_length )
{
$is_simple_next_term = 1;
}
next if $j == 0;
if ( $is_simple_last_term
&& $is_simple_next_term
&& $skipped_count < $max_skipped_count )
{
$skipped_count++;
}
else {
$skipped_count = 0;
my $i_tc = $ri_term_comma->[ $j - 1 ];
last unless defined($i_tc);
$self->set_forced_breakpoint($i_tc);
}
}
# always break at the last comma if this list is
# interrupted; we wouldn't want to leave a terminal '{', for
# example.
if ($interrupted) {
$self->set_forced_breakpoint($i_true_last_comma);
}
return;
} ## end sub apply_broken_sublist_rule
sub set_emergency_comma_breakpoints {
my (
$self, #
$number_of_fields_best,
$rhash_IN,
$comma_count,
$i_first_comma,
) = @_;
# The computed number of table fields is negative, so we have to make
# an emergency fix.
my $rcomma_index = $rhash_IN->{rcomma_index};
my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
my $must_break_open = $rhash_IN->{must_break_open};
# are we an item contained in an outer list?
my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
# In many cases, it may be best to not force a break if there is just
# one comma, because the standard continuation break logic will do a
# better job without it.
# In the common case that all but one of the terms can fit
# on a single line, it may look better not to break open the
# containing parens. Consider, for example
# $color =
# join ( '/',
# sort { $color_value{$::a} <=> $color_value{$::b}; }
# keys %colors );
# which will look like this with the container broken:
# $color = join (
# '/',
# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
# );
# Here is an example of this rule for a long last term:
# log_message( 0, 256, 128,
# "Number of routes in adj-RIB-in to be considered: $peercount" );
# And here is an example with a long first term:
# $s = sprintf(
# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
# $r, $pu, $ps, $cu, $cs, $tt
# )
# if $style eq 'all';
my $i_last_comma = $rcomma_index->[ $comma_count - 1 ];
my $long_last_term = $self->excess_line_length( 0, $i_last_comma ) <= 0;
my $long_first_term =
$self->excess_line_length( $i_first_comma + 1, $max_index_to_go ) <=
0;
# break at every comma ...
if (
# if requested by user or is best looking
$number_of_fields_best == 1
# or if this is a sublist of a larger list
|| $in_hierarchical_list
# or if multiple commas and we don't have a long first or last
# term
|| ( $comma_count > 1
&& !( $long_last_term || $long_first_term ) )
)
{
foreach ( 0 .. $comma_count - 1 ) {
$self->set_forced_breakpoint( $rcomma_index->[$_] );
}
}
elsif ($long_last_term) {
$self->set_forced_breakpoint($i_last_comma);
${$rdo_not_break_apart} = 1 unless $must_break_open;
}
elsif ($long_first_term) {
$self->set_forced_breakpoint($i_first_comma);
}
else {
# let breaks be defined by default bond strength logic
}
return;
} ## end sub set_emergency_comma_breakpoints
sub break_multiline_list {
my ( $self, $rhash_IN, $rhash_A, $i_opening_minus ) = @_;
# We have a list spanning multiple lines and are trying
# to decide the best way to set comma breakpoints.
# Overridden variables
my $item_count = $rhash_A->{_item_count_A};
my $identifier_count = $rhash_A->{_identifier_count_A};
# Derived variables:
## my $ritem_lengths = $rhash_A->{_ritem_lengths};
## my $ri_term_begin = $rhash_A->{_ri_term_begin};
## my $ri_term_end = $rhash_A->{_ri_term_end};
my $ri_term_comma = $rhash_A->{_ri_term_comma};
my $rmax_length = $rhash_A->{_rmax_length};
my $comma_count = $rhash_A->{_comma_count};
my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
my $first_term_length = $rhash_A->{_first_term_length};
my $i_first_comma = $rhash_A->{_i_first_comma};
my $i_last_comma = $rhash_A->{_i_last_comma};
## my $i_true_last_comma = $rhash_A->{_i_true_last_comma};
# Variables received from caller
my $i_opening_paren = $rhash_IN->{i_opening_paren};
## my $i_closing_paren = $rhash_IN->{i_closing_paren};
my $rcomma_index = $rhash_IN->{rcomma_index};
my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
my $list_type = $rhash_IN->{list_type};
## my $interrupted = $rhash_IN->{interrupted};
my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
my $must_break_open = $rhash_IN->{must_break_open};
## NOTE: these input vars from caller use the values from rhash_A (see above):
## my $item_count = $rhash_IN->{item_count};
## my $identifier_count = $rhash_IN->{identifier_count};
# NOTE: i_opening_paren changes value below so we need to get these here
my $opening_is_in_block = $self->is_in_block_by_i($i_opening_paren);
my $opening_token = $tokens_to_go[$i_opening_paren];
#---------------------------------------------------------------
# Section B1: Determine '$number_of_fields' = the best number of
# fields to use if this is to be formatted as a table.
#---------------------------------------------------------------
# Now we know that this block spans multiple lines; we have to set
# at least one breakpoint -- real or fake -- as a signal to break
# open any outer containers.
set_fake_breakpoint();
# Set a flag indicating if we need to break open to keep -lp
# items aligned. This is necessary if any of the list terms
# exceeds the available space after the '('.
my $need_lp_break_open = $must_break_open;
my $is_lp_formatting = ref( $leading_spaces_to_go[$i_first_comma] );
if ( $is_lp_formatting && !$must_break_open ) {
my $columns_if_unbroken =
$maximum_line_length_at_level[ $levels_to_go[$i_opening_minus] ]
- total_line_length( $i_opening_minus, $i_opening_paren );
$need_lp_break_open =
( $rmax_length->[0] > $columns_if_unbroken )
|| ( $rmax_length->[1] > $columns_if_unbroken )
|| ( $first_term_length > $columns_if_unbroken );
}
my $hash_B =
$self->table_layout_B( $rhash_IN, $rhash_A, $is_lp_formatting );
return if ( !defined($hash_B) );
# Updated variables
$i_first_comma = $hash_B->{_i_first_comma_B};
$i_opening_paren = $hash_B->{_i_opening_paren_B};
$item_count = $hash_B->{_item_count_B};
# New variables
my $columns = $hash_B->{_columns};
my $formatted_columns = $hash_B->{_formatted_columns};
my $formatted_lines = $hash_B->{_formatted_lines};
my $max_width = $hash_B->{_max_width};
my $new_identifier_count = $hash_B->{_new_identifier_count};
my $number_of_fields = $hash_B->{_number_of_fields};
## my $odd_or_even = $hash_B->{_odd_or_even};
my $packed_columns = $hash_B->{_packed_columns};
my $packed_lines = $hash_B->{_packed_lines};
my $pair_width = $hash_B->{_pair_width};
my $ri_ragged_break_list = $hash_B->{_ri_ragged_break_list};
my $use_separate_first_term = $hash_B->{_use_separate_first_term};
# are we an item contained in an outer list?
my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
my $unused_columns = $formatted_columns - $packed_columns;
# set some empirical parameters to help decide if we should try to
# align; high sparsity does not look good, especially with few lines
my $sparsity = ($unused_columns) / ($formatted_columns);
my $max_allowed_sparsity =
( $item_count < 3 ) ? 0.1
: ( $packed_lines == 1 ) ? 0.15
: ( $packed_lines == 2 ) ? 0.4
: 0.7;
my $two_line_word_wrap_ok;
if ( $opening_token eq '(' ) {
# default is to allow wrapping of short paren lists
$two_line_word_wrap_ok = 1;
# but turn off word wrap where requested
if ($rOpts_break_open_compact_parens) {
# This parameter is a one-character flag, as follows:
# '0' matches no parens -> break open NOT OK -> word wrap OK
# '1' matches all parens -> break open OK -> word wrap NOT OK
# Other values are the same as used by the weld-exclusion-list
my $flag = $rOpts_break_open_compact_parens;
if ( $flag eq '*'
|| $flag eq '1' )
{
$two_line_word_wrap_ok = 0;
}
elsif ( $flag eq '0' ) {
$two_line_word_wrap_ok = 1;
}
else {
my $seqno = $type_sequence_to_go[$i_opening_paren];
$two_line_word_wrap_ok =
!$self->match_paren_control_flag( $seqno, $flag );
}
}
}
#-------------------------------------------------------------------
# Section B2: Check for shortcut methods, which avoid treating
# a list as a table for relatively small parenthesized lists. These
# are usually easier to read if not formatted as tables.
#-------------------------------------------------------------------
if (
$packed_lines <= 2 # probably can fit in 2 lines
&& $item_count < 9 # doesn't have too many items
&& $opening_is_in_block # not a sub-container
&& $two_line_word_wrap_ok # ok to wrap this paren list
)
{
# Section B2A: Shortcut method 1: for -lp and just one comma:
# This is a no-brainer, just break at the comma.
if (
$is_lp_formatting # -lp
&& $item_count == 2 # two items, one comma
&& !$must_break_open
)
{
my $i_break = $rcomma_index->[0];
$self->set_forced_breakpoint($i_break);
${$rdo_not_break_apart} = 1;
return;
}
# Section B2B: Shortcut method 2 is for most small ragged lists
# which might look best if not displayed as a table.
if (
( $number_of_fields == 2 && $item_count == 3 )
|| (
$new_identifier_count > 0 # isn't all quotes
&& $sparsity > 0.15
) # would be fairly spaced gaps if aligned
)
{
my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
# NOTE: we should really use the true break count here,
# which can be greater if there are large terms and
# little space, but usually this will work well enough.
if ( !$must_break_open ) {
if ( $break_count <= 1
|| ( $is_lp_formatting && !$need_lp_break_open ) )
{
${$rdo_not_break_apart} = 1;
}
}
return;
}
} ## end shortcut methods
# debug stuff
DEBUG_SPARSE && do {
# How many spaces across the page will we fill?
my $columns_per_line =
int( $number_of_fields / 2 ) * $pair_width +
( $number_of_fields % 2 ) * $max_width;
print {*STDOUT}
"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
};
#------------------------------------------------------------------
# Section B3: Compound List Rule 2:
# If this list is too long for one line, and it is an item of a
# larger list, then we must format it, regardless of sparsity
# (ian.t). One reason that we have to do this is to trigger
# Compound List Rule 1, above, which causes breaks at all commas of
# all outer lists. In this way, the structure will be properly
# displayed.
#------------------------------------------------------------------
# Decide if this list is too long for one line unless broken
my $total_columns = table_columns_available($i_opening_paren);
my $too_long = $packed_columns > $total_columns;
# For a paren list, include the length of the token just before the
# '(' because this is likely a sub call, and we would have to
# include the sub name on the same line as the list. This is still
# imprecise, but not too bad. (steve.t)
if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
$too_long = $self->excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
# TODO: For an item after a '=>', try to include the length of the
# thing before the '=>'. This is crude and should be improved by
# actually looking back token by token.
if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
my $i_opening_minus_test = $i_opening_paren - 4;
if ( $i_opening_minus_test >= 0 ) {
$too_long = $self->excess_line_length( $i_opening_minus_test,
$i_effective_last_comma + 1 ) > 0;
}
}
# Always break lists contained in '[' and '{' if too long for 1 line,
# and always break lists which are too long and part of a more complex
# structure.
my $must_break_open_container = $must_break_open
|| ( $too_long
&& ( $in_hierarchical_list || !$two_line_word_wrap_ok ) );
#--------------------------------------------------------------------
# Section B4: A table will work here. But do not attempt to align
# columns if this is a tiny table or it would be too spaced. It
# seems that the more packed lines we have, the sparser the list that
# can be allowed and still look ok.
#--------------------------------------------------------------------
if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
|| ( $formatted_lines < 2 )
|| ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
)
{
#----------------------------------------------------------------
# Section B4A: too sparse: would not look good aligned in a table
#----------------------------------------------------------------
# use old breakpoints if this is a 'big' list
if ( $packed_lines > 2 && $item_count > 10 ) {
write_logfile_entry("List sparse: using old breakpoints\n");
$self->copy_old_breakpoints( $i_first_comma, $i_last_comma );
}
# let the continuation logic handle it if 2 lines
else {
my $break_count = $self->set_ragged_breakpoints( $ri_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
if ( !$must_break_open_container ) {
if ( $break_count <= 1
|| ( $is_lp_formatting && !$need_lp_break_open ) )
{
${$rdo_not_break_apart} = 1;
}
}
}
return;
}
#--------------------------------------------
# Section B4B: Go ahead and format as a table
#--------------------------------------------
$self->write_formatted_table( $number_of_fields, $comma_count,
$rcomma_index, $use_separate_first_term );
return;
} ## end sub break_multiline_list
sub table_layout_A {
my ($rhash_IN) = @_;
# Find lengths of all list items needed to calculate page layout
# Returns:
# - nothing if this list is empty, or
# - a ref to a hash containing some derived parameters
my $i_opening_paren = $rhash_IN->{i_opening_paren};
my $i_closing_paren = $rhash_IN->{i_closing_paren};
my $identifier_count = $rhash_IN->{identifier_count};
my $rcomma_index = $rhash_IN->{rcomma_index};
my $item_count = $rhash_IN->{item_count};
# nothing to do if no commas seen
return if ( $item_count < 1 );
my $i_first_comma = $rcomma_index->[0];
my $i_true_last_comma = $rcomma_index->[ $item_count - 1 ];
my $i_last_comma = $i_true_last_comma;
if ( $i_last_comma >= $max_index_to_go ) {
$item_count -= 1;
return if ( $item_count < 1 );
$i_last_comma = $rcomma_index->[ $item_count - 1 ];
}
my $comma_count = $item_count;
my $ritem_lengths = [];
my $ri_term_begin = [];
my $ri_term_end = [];
my $ri_term_comma = [];
my $rmax_length = [ 0, 0 ];
my $i_prev_plus;
my $first_term_length;
my $i = $i_opening_paren;
my $is_odd = 1;
foreach my $j ( 0 .. $comma_count - 1 ) {
$is_odd = 1 - $is_odd;
$i_prev_plus = $i + 1;
$i = $rcomma_index->[$j];
my $i_term_end =
( $i == 0 || $types_to_go[ $i - 1 ] eq 'b' )
? $i - 2
: $i - 1;
my $i_term_begin =
( $types_to_go[$i_prev_plus] eq 'b' )
? $i_prev_plus + 1
: $i_prev_plus;
push @{$ri_term_begin}, $i_term_begin;
push @{$ri_term_end}, $i_term_end;
push @{$ri_term_comma}, $i;
# note: currently adding 2 to all lengths (for comma and space)
my $length =
2 + token_sequence_length( $i_term_begin, $i_term_end );
push @{$ritem_lengths}, $length;
if ( $j == 0 ) {
$first_term_length = $length;
}
else {
if ( $length > $rmax_length->[$is_odd] ) {
$rmax_length->[$is_odd] = $length;
}
}
}
# now we have to make a distinction between the comma count and item
# count, because the item count will be one greater than the comma
# count if the last item is not terminated with a comma
my $i_b =
( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
? $i_last_comma + 1
: $i_last_comma;
# NOTE: $i_closing_paren = $max_index_to_go+1 for a list which does
# not end in a closing paren. So the following test works (c410)
my $i_e =
( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
? $i_closing_paren - 2
: $i_closing_paren - 1;
my $i_effective_last_comma = $i_last_comma;
my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
if ( $last_item_length > 0 ) {
# add 2 to length because other lengths include a comma and a blank
$last_item_length += 2;
push @{$ritem_lengths}, $last_item_length;
push @{$ri_term_begin}, $i_b + 1;
push @{$ri_term_end}, $i_e;
push @{$ri_term_comma}, undef;
my $i_odd = $item_count % 2;
if ( $last_item_length > $rmax_length->[$i_odd] ) {
$rmax_length->[$i_odd] = $last_item_length;
}
$item_count++;
$i_effective_last_comma = $i_e + 1;
if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
$identifier_count++;
}
}
# be sure we do not extend beyond the current list length
if ( $i_effective_last_comma >= $max_index_to_go ) {
$i_effective_last_comma = $max_index_to_go - 1;
}
# Return the hash of derived variables.
return {
# Updated variables
_item_count_A => $item_count,
_identifier_count_A => $identifier_count,
# New variables
_ritem_lengths => $ritem_lengths,
_ri_term_begin => $ri_term_begin,
_ri_term_end => $ri_term_end,
_ri_term_comma => $ri_term_comma,
_rmax_length => $rmax_length,
_comma_count => $comma_count,
_i_effective_last_comma => $i_effective_last_comma,
_first_term_length => $first_term_length,
_i_first_comma => $i_first_comma,
_i_last_comma => $i_last_comma,
_i_true_last_comma => $i_true_last_comma,
};
} ## end sub table_layout_A
sub table_layout_B {
my ( $self, $rhash_IN, $rhash_A, $is_lp_formatting ) = @_;
# Determine variables for the best table layout, including
# the best number of fields.
# Returns:
# - nothing if nothing more to do
# - a ref to a hash containing some derived parameters
# Variables from caller
my $i_opening_paren = $rhash_IN->{i_opening_paren};
my $list_type = $rhash_IN->{list_type};
my $next_nonblank_type = $rhash_IN->{next_nonblank_type};
my $rcomma_index = $rhash_IN->{rcomma_index};
my $rdo_not_break_apart = $rhash_IN->{rdo_not_break_apart};
# Table size variables
my $comma_count = $rhash_A->{_comma_count};
my $first_term_length = $rhash_A->{_first_term_length};
my $i_effective_last_comma = $rhash_A->{_i_effective_last_comma};
my $i_first_comma = $rhash_A->{_i_first_comma};
my $identifier_count = $rhash_A->{_identifier_count_A};
my $item_count = $rhash_A->{_item_count_A};
my $ri_term_begin = $rhash_A->{_ri_term_begin};
my $ri_term_comma = $rhash_A->{_ri_term_comma};
my $ri_term_end = $rhash_A->{_ri_term_end};
my $ritem_lengths = $rhash_A->{_ritem_lengths};
my $rmax_length = $rhash_A->{_rmax_length};
# Specify if the list must have an even number of fields or not.
# It is generally safest to assume an even number, because the
# list items might be a hash list. But if we can be sure that
# it is not a hash, then we can allow an odd number for more
# flexibility.
# 1 = odd field count ok, 2 = want even count
my $odd_or_even = 2;
if (
$identifier_count >= $item_count - 1
|| $is_assignment{$next_nonblank_type}
|| ( $list_type
&& $list_type ne '=>'
&& $list_type !~ /^[\:\?]$/ )
)
{
$odd_or_even = 1;
}
# do we have a long first term which should be
# left on a line by itself?
my $use_separate_first_term = (
$odd_or_even == 1 # only if we can use 1 field/line
&& $item_count > 3 # need several items
&& $first_term_length >
2 * $rmax_length->[0] - 2 # need long first term
&& $first_term_length >
2 * $rmax_length->[1] - 2 # need long first term
);
# or do we know from the type of list that the first term should
# be placed alone?
if ( !$use_separate_first_term ) {
if ( $is_keyword_with_special_leading_term{$list_type} ) {
$use_separate_first_term = 1;
# should the container be broken open?
if ( $item_count < 3 ) {
if ( $i_first_comma - $i_opening_paren < 4 ) {
${$rdo_not_break_apart} = 1;
}
}
elsif ($first_term_length < 20
&& $i_first_comma - $i_opening_paren < 4 )
{
my $columns = table_columns_available($i_first_comma);
if ( $first_term_length < $columns ) {
${$rdo_not_break_apart} = 1;
}
}
else {
# break
}
}
}
# if so,
if ($use_separate_first_term) {
# ..set a break and update starting values
$self->set_forced_breakpoint($i_first_comma);
$item_count--;
#---------------------------------------------------------------
# Section B1A: Stop if one item remains ($i_first_comma = undef)
#---------------------------------------------------------------
# Fix for b1442: use '$item_count' here instead of '$comma_count'
# to make the result independent of any trailing comma.
return if ( $item_count <= 1 );
$i_opening_paren = $i_first_comma;
$i_first_comma = $rcomma_index->[1];
shift @{$ritem_lengths};
shift @{$ri_term_begin};
shift @{$ri_term_end};
shift @{$ri_term_comma};
}
# if not, update the metrics to include the first term
else {
if ( $first_term_length > $rmax_length->[0] ) {
$rmax_length->[0] = $first_term_length;
}
}
# Field width parameters
my $pair_width = ( $rmax_length->[0] + $rmax_length->[1] );
my $max_width =
( $rmax_length->[0] > $rmax_length->[1] )
? $rmax_length->[0]
: $rmax_length->[1];
# Number of free columns across the page width for laying out tables
my $columns = table_columns_available($i_first_comma);
# Patch for b1210 and b1216-b1218 when -vmll is set. If we are unable
# to break after an opening paren, then the maximum line length for the
# first line could be less than the later lines. So we need to reduce
# the line length. Normally, we will get a break after an opening
# paren, but in some cases we might not.
if ( $rOpts_variable_maximum_line_length
&& $tokens_to_go[$i_opening_paren] eq '('
&& @{$ri_term_begin} )
{
my $ib = $ri_term_begin->[0];
my $type = $types_to_go[$ib];
# So far, the only known instance of this problem is when
# a bareword follows an opening paren with -vmll
if ( $type eq 'w' ) {
# If a line starts with paren+space+terms, then its max length
# could be up to ci+2-i spaces less than if the term went out
# on a line after the paren. So..
my $tol_w = max( 0,
2 + $rOpts_continuation_indentation -
$rOpts_indent_columns );
$columns = max( 0, $columns - $tol_w );
## Here is the original b1210 fix, but it failed on b1216-b1218
##my $columns2 = table_columns_available($i_opening_paren);
##$columns = min( $columns, $columns2 );
}
}
# Estimated maximum number of fields which fit this space.
# This will be our first guess:
my $number_of_fields_max =
maximum_number_of_fields( $columns, $odd_or_even, $max_width,
$pair_width );
my $number_of_fields = $number_of_fields_max;
# Find the best-looking number of fields.
# This will be our second guess, if possible.
my ( $number_of_fields_best, $ri_ragged_break_list,
$new_identifier_count )
= $self->study_list_complexity( $ri_term_begin, $ri_term_end,
$ritem_lengths, $max_width );
if ( $number_of_fields_best != 0
&& $number_of_fields_best < $number_of_fields_max )
{
$number_of_fields = $number_of_fields_best;
}
# fix b1427
elsif ($number_of_fields_best > 1
&& $number_of_fields_best > $number_of_fields_max )
{
$number_of_fields_best = $number_of_fields_max;
}
else {
# no change
}
# If we are crowded and the -lp option is being used, try
# to undo some indentation
if (
$is_lp_formatting
&& (
$number_of_fields == 0
|| ( $number_of_fields == 1
&& $number_of_fields != $number_of_fields_best )
)
)
{
( $number_of_fields, $number_of_fields_best, $columns ) =
$self->lp_table_fix(
{
columns => $columns,
i_first_comma => $i_first_comma,
max_width => $max_width,
number_of_fields => $number_of_fields,
number_of_fields_best => $number_of_fields_best,
odd_or_even => $odd_or_even,
pair_width => $pair_width,
ritem_lengths => $ritem_lengths,
}
);
}
# try for one column if two won't work
if ( $number_of_fields <= 0 ) {
$number_of_fields = int( $columns / $max_width );
}
# The user can place an upper bound on the number of fields,
# which can be useful for doing maintenance on tables
if ( $rOpts_maximum_fields_per_table
&& $number_of_fields > $rOpts_maximum_fields_per_table )
{
$number_of_fields = $rOpts_maximum_fields_per_table;
}
# How many columns (characters) and lines would this container take
# if no additional whitespace were added?
my $packed_columns = token_sequence_length( $i_opening_paren + 1,
$i_effective_last_comma + 1 );
if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
my $packed_lines = 1 + int( $packed_columns / $columns );
#-----------------------------------------------------------------
# Section B1B: Stop here if we did not compute a positive number of
# fields. In this case we just have to bail out.
#-----------------------------------------------------------------
if ( $number_of_fields <= 0 ) {
$self->set_emergency_comma_breakpoints(
$number_of_fields_best,
$rhash_IN,
$comma_count,
$i_first_comma,
);
return;
}
#------------------------------------------------------------------
# Section B1B: We have a tentative field count that seems to work.
# Now we must look more closely to determine if a table layout will
# actually look okay.
#------------------------------------------------------------------
# How many lines will this require?
my $formatted_lines = $item_count / $number_of_fields;
if ( $formatted_lines != int($formatted_lines) ) {
$formatted_lines = 1 + int($formatted_lines);
}
# So far we've been trying to fill out to the right margin. But
# compact tables are easier to read, so let's see if we can use fewer
# fields without increasing the number of lines.
$number_of_fields = compactify_table( $item_count, $number_of_fields,
$formatted_lines, $odd_or_even );
my $formatted_columns;
if ( $number_of_fields > 1 ) {
$formatted_columns =
( $pair_width * ( int( $item_count / 2 ) ) +
( $item_count % 2 ) * $max_width );
}
else {
$formatted_columns = $max_width * $item_count;
}
if ( $formatted_columns < $packed_columns ) {
$formatted_columns = $packed_columns;
}
# Construce hash_B:
return {
# Updated variables
_i_first_comma_B => $i_first_comma,
_i_opening_paren_B => $i_opening_paren,
_item_count_B => $item_count,
# New variables
_columns => $columns,
_formatted_columns => $formatted_columns,
_formatted_lines => $formatted_lines,
_max_width => $max_width,
_new_identifier_count => $new_identifier_count,
_number_of_fields => $number_of_fields,
## _odd_or_even => $odd_or_even,
_packed_columns => $packed_columns,
_packed_lines => $packed_lines,
_pair_width => $pair_width,
_ri_ragged_break_list => $ri_ragged_break_list,
_use_separate_first_term => $use_separate_first_term,
};
} ## end sub table_layout_B
sub lp_table_fix {
# try to undo some -lp indentation to improve table formatting
my ( $self, $rcall_hash ) = @_;
my $columns = $rcall_hash->{columns};
my $i_first_comma = $rcall_hash->{i_first_comma};
my $max_width = $rcall_hash->{max_width};
my $number_of_fields = $rcall_hash->{number_of_fields};
my $number_of_fields_best = $rcall_hash->{number_of_fields_best};
my $odd_or_even = $rcall_hash->{odd_or_even};
my $pair_width = $rcall_hash->{pair_width};
my $ritem_lengths = $rcall_hash->{ritem_lengths};
my $available_spaces =
$self->get_available_spaces_to_go($i_first_comma);
if ( $available_spaces > 0 ) {
my $spaces_wanted = $max_width - $columns; # for 1 field
if ( $number_of_fields_best == 0 ) {
$number_of_fields_best =
get_maximum_fields_wanted($ritem_lengths);
}
if ( $number_of_fields_best != 1 ) {
my $spaces_wanted_2 = 1 + $pair_width - $columns; # for 2 fields
if ( $available_spaces > $spaces_wanted_2 ) {
$spaces_wanted = $spaces_wanted_2;
}
}
if ( $spaces_wanted > 0 ) {
my $deleted_spaces =
$self->reduce_lp_indentation( $i_first_comma,
$spaces_wanted );
# redo the math
if ( $deleted_spaces > 0 ) {
$columns = table_columns_available($i_first_comma);
$number_of_fields =
maximum_number_of_fields( $columns, $odd_or_even,
$max_width, $pair_width );
if ( $number_of_fields_best == 1
&& $number_of_fields >= 1 )
{
$number_of_fields = $number_of_fields_best;
}
}
}
}
return ( $number_of_fields, $number_of_fields_best, $columns );
} ## end sub lp_table_fix
sub write_formatted_table {
# Write a table of comma separated items with fixed number of fields
my ( $self, $number_of_fields, $comma_count, $rcomma_index,
$use_separate_first_term )
= @_;
write_logfile_entry(
"List: auto formatting with $number_of_fields fields/row\n");
if ( $number_of_fields < 1 ) {
## shouldn't happen - caller passed bad parameter
DEVEL_MODE && Fault("bad number of fields=$number_of_fields\n");
return;
}
my $j_first_break =
$use_separate_first_term
? $number_of_fields
: $number_of_fields - 1;
my $j = $j_first_break;
while ( $j < $comma_count ) {
my $i_comma = $rcomma_index->[$j];
$self->set_forced_breakpoint($i_comma);
$j += $number_of_fields;
}
return;
} ## end sub write_formatted_table
} ## end closure set_comma_breakpoint_final
sub study_list_complexity {
my ( $self, $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
# Look for complex tables which should be formatted with one term per line.
# Returns the following:
#
# \@i_ragged_break_list = list of good breakpoints to avoid lines
# which are hard to read
# $number_of_fields_best = suggested number of fields based on
# complexity; = 0 if any number may be used.
#
my $item_count = @{$ri_term_begin};
my $complex_item_count = 0;
my $number_of_fields_best = $rOpts_maximum_fields_per_table;
my $i_max = @{$ritem_lengths} - 1;
my $i_last_last_break = -3;
my $i_last_break = -2;
my @i_ragged_break_list;
my $definitely_complex = 30;
my $definitely_simple = 12;
my $quote_count = 0;
for my $i ( 0 .. $i_max ) {
my $ib = $ri_term_begin->[$i];
my $ie = $ri_term_end->[$i];
# define complexity: start with the actual term length
my $weighted_length = ( $ritem_lengths->[$i] - 2 );
##TBD: join types here and check for variations
##my $str=join "", @tokens_to_go[$ib..$ie];
my $is_quote = 0;
if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
$is_quote = 1;
$quote_count++;
}
elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
$quote_count++;
}
else {
# not a quote
}
if ( $ib eq $ie ) {
if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
$complex_item_count++;
$weighted_length *= 2;
}
else {
}
}
else {
if ( first { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
$complex_item_count++;
$weighted_length *= 2;
}
if ( first { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
$weighted_length += 4;
}
}
# add weight for extra tokens.
$weighted_length += 2 * ( $ie - $ib );
# mark a ragged break after this item it if it is 'long and complex':
if ( $weighted_length >= $definitely_complex ) {
# if we broke after the previous term
# then break before it too
if ( $i_last_break == $i - 1
&& $i > 1
&& $i_last_last_break != $i - 2 )
{
## TODO: don't strand a small term
pop @i_ragged_break_list;
push @i_ragged_break_list, $i - 2;
push @i_ragged_break_list, $i - 1;
}
push @i_ragged_break_list, $i;
$i_last_last_break = $i_last_break;
$i_last_break = $i;
}
# don't break before a small last term -- it will
# not look good on a line by itself.
elsif ($i == $i_max
&& $i_last_break == $i - 1
&& $weighted_length <= $definitely_simple )
{
pop @i_ragged_break_list;
}
else {
# ok as is
}
}
my $identifier_count = $i_max + 1 - $quote_count;
# Need more tuning here..
if ( $max_width > 12
&& $complex_item_count > $item_count / 2
&& $number_of_fields_best != 2 )
{
$number_of_fields_best = 1;
}
return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
} ## end sub study_list_complexity
sub get_maximum_fields_wanted {
my ($ritem_lengths) = @_;
# Not all tables look good with more than one field of items.
# This routine looks at a table and decides if it should be
# formatted with just one field or not.
# This coding is still under development.
my $number_of_fields_best = 0;
# For just a few items, we tentatively assume just 1 field.
my $item_count = @{$ritem_lengths};
if ( $item_count <= 5 ) {
$number_of_fields_best = 1;
}
# For larger tables, look at it both ways and see what looks best
else {
my $is_odd = 1;
my @max_length = ( 0, 0 );
my @last_length_2 = ( undef, undef );
my @first_length_2 = ( undef, undef );
my $last_length = undef;
my $total_variation_1 = 0;
my $total_variation_2 = 0;
my @total_variation_2_sums = ( 0, 0 );
foreach my $j ( 0 .. $item_count - 1 ) {
$is_odd = 1 - $is_odd;
my $length = $ritem_lengths->[$j];
if ( $length > $max_length[$is_odd] ) {
$max_length[$is_odd] = $length;
}
if ( defined($last_length) ) {
my $dl = abs( $length - $last_length );
$total_variation_1 += $dl;
}
$last_length = $length;
my $ll = $last_length_2[$is_odd];
if ( defined($ll) ) {
my $dl = abs( $length - $ll );
$total_variation_2_sums[$is_odd] += $dl;
}
else {
$first_length_2[$is_odd] = $length;
}
$last_length_2[$is_odd] = $length;
}
$total_variation_2 =
$total_variation_2_sums[0] + $total_variation_2_sums[1];
my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
if ( $total_variation_2 >= $factor * $total_variation_1 ) {
$number_of_fields_best = 1;
}
}
return ($number_of_fields_best);
} ## end sub get_maximum_fields_wanted
sub table_columns_available {
my $i_first_comma = shift;
my $columns =
$maximum_line_length_at_level[ $levels_to_go[$i_first_comma] ] -
leading_spaces_to_go($i_first_comma);
# Patch: the vertical formatter does not line up lines whose lengths
# exactly equal the available line length because of allowances
# that must be made for side comments. Therefore, the number of
# available columns is reduced by 1 character.
$columns -= 1;
return $columns;
} ## end sub table_columns_available
sub maximum_number_of_fields {
# how many fields will fit in the available space?
my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
my $max_pairs = int( $columns / $pair_width );
my $number_of_fields = $max_pairs * 2;
if ( $odd_or_even == 1
&& $max_pairs * $pair_width + $max_width <= $columns )
{
$number_of_fields++;
}
return $number_of_fields;
} ## end sub maximum_number_of_fields
sub compactify_table {
my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
# For a table with a certain number of fields and a certain number
# of lines, see if reducing the number of fields will make it look
# better.
# Given:
# $item_count = count of list items
# $number_of_fields = current number of items per line
# $formatted_lines = number of lines this will require
# $odd_or_even = 1=>odd field count is ok, 2=>want even count
# Return:
# $number_of_fields = updated number of items per line
if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
my $min_fields = $number_of_fields;
if ( $odd_or_even < 1 ) {
## shouldn't happen - caller passed bad parameter
DEVEL_MODE && Fault("bad value for odd_or_even=$odd_or_even\n");
return $number_of_fields;
}
while ($min_fields >= $odd_or_even
&& $min_fields * $formatted_lines >= $item_count )
{
$number_of_fields = $min_fields;
$min_fields -= $odd_or_even;
} ## end while ( $min_fields >= $odd_or_even...)
}
return $number_of_fields;
} ## end sub compactify_table
sub set_ragged_breakpoints {
my ( $self, $ri_term_comma, $ri_ragged_break_list ) = @_;
# Set breakpoints in a list that cannot be formatted nicely as a table.
my $break_count = 0;
foreach ( @{$ri_ragged_break_list} ) {
my $j = $ri_term_comma->[$_];
if ($j) {
$self->set_forced_breakpoint($j);
$break_count++;
}
}
return $break_count;
} ## end sub set_ragged_breakpoints
sub copy_old_breakpoints {
my ( $self, $i_first_comma, $i_last_comma ) = @_;
# We are formatting a list and have decided to make comma breaks
# the same as in the input file.
# If the comma style is under certain controls, and if this is a
# comma breakpoint with the comma at the beginning of the next
# line, then we must pass that index instead. This will allow sub
# set_forced_breakpoints to check and follow the user settings. This
# produces a uniform style and can prevent instability (b1422).
#
# The flag '$controlled_comma_style' will be set if the user
# entered any of -wbb=',' -wba=',' -kbb=',' -kba=','. It is not
# set for the -boc flag, but we will need to treat -boc in the
# same way for lists with breaks both before and after commas to
# avoid excess iterations.
my @i_old_breaks;
for my $i ( $i_first_comma .. $i_last_comma ) {
if ( $old_breakpoint_to_go[$i] ) {
push @i_old_breaks, $i;
}
}
# just copy old breakpoints unless $controlled_comma_style or -boc
if ( !$controlled_comma_style
&& !$rOpts_break_at_old_comma_breakpoints )
{
foreach my $ii (@i_old_breaks) {
$self->set_forced_breakpoint($ii);
}
return;
}
# Scan for commas before and after the old breakpoints...
my @i_breaks;
my $num_after;
my $num_before;
foreach my $i (@i_old_breaks) {
my $i_break = $i;
if ( $types_to_go[$i_break] ne ',' ) {
my $index = $inext_to_go[$i_break];
if ( $index > $i_break && $types_to_go[$index] eq ',' ) {
$i_break = $index;
$num_before++;
}
}
else { $num_after++; }
push @i_breaks, $i_break;
}
# -boc by itself can use old breaks except when there are mixed
# leading and trailing commas. In that case excess iterations
# can occur (see b878)
if ( !$controlled_comma_style
&& $rOpts_break_at_old_comma_breakpoints )
{
my $mixed = $num_before && $num_after;
if ( !$mixed ) {
@i_breaks = @i_old_breaks;
}
}
foreach my $ii (@i_breaks) {
$self->set_forced_breakpoint($ii);
}
return;
} ## end sub copy_old_breakpoints
sub set_nobreaks {
my ( $self, $i, $j ) = @_;
# Given:
# $i = starting index in _to_go arrays
# $j = ending index in _to_go arrays
# Task:
# set nobreak_to_go for index range $i .. $j
if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
0 && do {
my ( $pkg, $file_uu, $lno ) = caller();
print {*STDOUT}
"NOBREAK: forced_breakpoint $forced_breakpoint_count from $pkg $lno with i=$i max=$max_index_to_go type=$types_to_go[$i]\n";
};
@nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
}
# shouldn't happen; non-critical error
else {
if (DEVEL_MODE) {
my ( $pkg, $file_uu, $lno ) = caller();
Fault(<<EOM);
NOBREAK ERROR: from $pkg $lno with i=$i j=$j max=$max_index_to_go
EOM
}
}
return;
} ## end sub set_nobreaks
###############################################
# CODE SECTION 12: Code for setting indentation
###############################################
sub token_sequence_length {
# return length of tokens ($ibeg .. $iend) including $ibeg & $iend
my ( $ibeg, $iend ) = @_;
# fix possible negative starting index
if ( $ibeg < 0 ) { $ibeg = 0 }
# returns 0 if index range is empty (some subs assume this)
if ( $ibeg > $iend ) {
return 0;
}
return $summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$ibeg];
} ## end sub token_sequence_length
sub total_line_length {
# return length of a line of tokens ($ibeg .. $iend)
my ( $ibeg, $iend ) = @_;
# get the leading spaces on this line ...
my $spaces = $leading_spaces_to_go[$ibeg];
if ( ref($spaces) ) { $spaces = $spaces->get_spaces() }
# ... then add the net token length
return $spaces + $summed_lengths_to_go[ $iend + 1 ] -
$summed_lengths_to_go[$ibeg];
} ## end sub total_line_length
sub excess_line_length {
my ( $self, $ibeg, $iend, ($ignore_right_weld) ) = @_;
# Return number of characters by which a line of tokens ($ibeg..$iend)
# exceeds the allowable line length.
# Given:
# $ibeg, $iend = range of indexes of this line in the _to_go arrays
# $ignore_right_weld = optional flag = true to exclude any right weld
# NOTE: profiling shows that efficiency of this routine is essential.
# Start with the leading spaces on this line ...
my $excess = $leading_spaces_to_go[$ibeg];
if ( ref($excess) ) { $excess = $excess->get_spaces() }
# ... and include right weld lengths unless requested not to
if ( $total_weld_count
&& $type_sequence_to_go[$iend]
&& !$ignore_right_weld )
{
my $wr = $self->[_rweld_len_right_at_K_]->{ $K_to_go[$iend] };
$excess += $wr if defined($wr);
}
# ... then add the net token length, minus the maximum length
return $excess +
$summed_lengths_to_go[ $iend + 1 ] -
$summed_lengths_to_go[$ibeg] -
$maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
} ## end sub excess_line_length
sub get_spaces {
# return the number of leading spaces associated with an indentation
# variable $indentation is either a constant number of spaces or an object
# with a get_spaces method.
my $indentation = shift;
return ref($indentation) ? $indentation->get_spaces() : $indentation;
} ## end sub get_spaces
sub get_recoverable_spaces {
# return the number of spaces (+ means shift right, - means shift left)
# that we would like to shift a group of lines with the same indentation
# to get them to line up with their opening parens
my $indentation = shift;
return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
} ## end sub get_recoverable_spaces
sub get_available_spaces_to_go {
my ( $self, $ii ) = @_;
my $item = $leading_spaces_to_go[$ii];
# return the number of available leading spaces associated with an
# indentation variable. $indentation is either a constant number of
# spaces or an object with a get_available_spaces method.
return ref($item) ? $item->get_available_spaces() : 0;
} ## end sub get_available_spaces_to_go
{ ## begin closure set_lp_indentation
use constant DEBUG_LP => 0;
# Stack of -lp index objects which survives between batches.
my $rLP;
my $max_lp_stack;
# The predicted position of the next opening container which may start
# an -lp indentation level. This survives between batches.
my $lp_position_predictor;
BEGIN {
# Index names for the -lp stack variables.
# Do not combine with other BEGIN blocks (c101).
my $i = 0;
use constant {
_lp_ci_level_ => $i++,
_lp_level_ => $i++,
_lp_object_ => $i++,
_lp_container_seqno_ => $i++,
_lp_space_count_ => $i++,
};
} ## end BEGIN
sub initialize_lp_vars {
# initialize gnu variables for a new file;
# must be called once at the start of a new file.
$lp_position_predictor = 0;
$max_lp_stack = 0;
# we can turn off -lp if all levels will be at or above the cutoff
if ( $high_stress_level <= 1 ) {
$rOpts_line_up_parentheses = 0;
$rOpts_extended_line_up_parentheses = 0;
}
# fix for b1459: -naws adds stress for -xlp
if ( $high_stress_level <= 2 && !$rOpts_add_whitespace ) {
$rOpts_extended_line_up_parentheses = 0;
}
# fix for b1465: -vmll adds stress for -xlp
if ( $high_stress_level <= 2 && $rOpts_variable_maximum_line_length ) {
$rOpts_extended_line_up_parentheses = 0;
}
$rLP = [];
# initialize the leading whitespace stack to negative levels
# so that we can never run off the end of the stack
$rLP->[$max_lp_stack]->[_lp_ci_level_] = -1;
$rLP->[$max_lp_stack]->[_lp_level_] = -1;
$rLP->[$max_lp_stack]->[_lp_object_] = undef;
$rLP->[$max_lp_stack]->[_lp_container_seqno_] = SEQ_ROOT;
$rLP->[$max_lp_stack]->[_lp_space_count_] = 0;
return;
} ## end sub initialize_lp_vars
# hashes for efficient testing
my %hash_test1;
my %hash_test2;
my %hash_test3;
BEGIN {
my @q = qw< } ) ] >;
@hash_test1{@q} = (1) x scalar(@q);
@q = qw( : ? f );
push @q, ',';
@hash_test2{@q} = (1) x scalar(@q);
@q = qw( . || && );
@hash_test3{@q} = (1) x scalar(@q);
} ## end BEGIN
# shared variables, re-initialized for each batch
my $rlp_object_list;
my $max_lp_object_list;
my %lp_comma_count;
my %lp_arrow_count;
my $space_count;
my $current_level;
my $current_ci_level;
my $ii_begin_line;
my $in_lp_mode;
my $stack_changed;
my $K_last_nonblank;
my $last_nonblank_token;
my $last_nonblank_type;
my $last_last_nonblank_type;
sub set_lp_indentation {
my ($self) = @_;
# Define the leading whitespace for all tokens in the current batch
# when the -lp formatting is selected.
# Returns number of tokens in this batch which have leading spaces
# defined by an lp object:
my $lp_object_count_this_batch = 0;
# Safety check: this should not be called when there is nothing to do
if ( !$rOpts_line_up_parentheses
|| !defined($max_index_to_go)
|| $max_index_to_go < 0 )
{
my $lp_str =
defined($rOpts_line_up_parentheses)
? $rOpts_line_up_parentheses
: 'undef';
my $max_str =
defined($max_index_to_go) ? $max_index_to_go : 'undef';
DEVEL_MODE
&& Fault(
"should not be here with -lp=$lp_str -max_index_to_go=$max_str\n"
);
return $lp_object_count_this_batch;
}
# List of -lp indentation objects created in this batch
$rlp_object_list = [];
$max_lp_object_list = -1;
%lp_comma_count = ();
%lp_arrow_count = ();
$space_count = undef;
$current_level = undef;
$current_ci_level = undef;
$ii_begin_line = 0;
$in_lp_mode = 0;
$stack_changed = 1;
$K_last_nonblank = undef;
$last_nonblank_token = EMPTY_STRING;
$last_nonblank_type = EMPTY_STRING;
$last_last_nonblank_type = EMPTY_STRING;
my %last_lp_equals = ();
my $rLL = $self->[_rLL_];
my $this_batch = $self->[_this_batch_];
my $starting_in_quote = $this_batch->[_starting_in_quote_];
my $imin = 0;
# The 'starting_in_quote' flag means that the first token is the first
# token of a line and it is also the continuation of some kind of
# multi-line quote or pattern. It must have no added leading
# whitespace, so we can skip it.
if ($starting_in_quote) {
$imin += 1;
}
my $Kpnb = $K_to_go[0] - 1;
if ( $Kpnb > 0 && $rLL->[$Kpnb]->[_TYPE_] eq 'b' ) {
$Kpnb -= 1;
}
if ( $Kpnb >= 0 && $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) {
$K_last_nonblank = $Kpnb;
}
if ( defined($K_last_nonblank) ) {
$last_nonblank_token = $rLL->[$K_last_nonblank]->[_TOKEN_];
$last_nonblank_type = $rLL->[$K_last_nonblank]->[_TYPE_];
}
#-----------------------------------
# Loop over all tokens in this batch
#-----------------------------------
foreach my $ii ( $imin .. $max_index_to_go ) {
my $type = $types_to_go[$ii];
my $token = $tokens_to_go[$ii];
my $level = $levels_to_go[$ii];
my $ci_level = $ci_levels_to_go[$ii];
my $total_depth = $nesting_depth_to_go[$ii];
# get the top state from the stack if it has changed
if ($stack_changed) {
my $rLP_top = $rLP->[$max_lp_stack];
my $lp_object = $rLP_top->[_lp_object_];
if ($lp_object) {
( $space_count, $current_level, $current_ci_level ) =
@{ $lp_object->get_spaces_level_ci() };
}
else {
$current_ci_level = $rLP_top->[_lp_ci_level_];
$current_level = $rLP_top->[_lp_level_];
$space_count = $rLP_top->[_lp_space_count_];
}
$stack_changed = 0;
}
#------------------------------------------------------------
# Break at a previous '=' if necessary to control line length
#------------------------------------------------------------
if ( $type eq '{' || $type eq '(' ) {
$lp_comma_count{ $total_depth + 1 } = 0;
$lp_arrow_count{ $total_depth + 1 } = 0;
# If we come to an opening token after an '=' token of some
# type, see if it would be helpful to 'break' after the '=' to
# save space
my $ii_last_equals = $last_lp_equals{$total_depth};
if ($ii_last_equals) {
$self->lp_equals_break_check( $ii, $ii_last_equals );
}
}
#------------------------
# Handle decreasing depth
#------------------------
# Note that one token may have both decreasing and then increasing
# depth. For example, (level, ci) can go from (1,1) to (2,0). So,
# in this example we would first go back to (1,0) then up to (2,0)
# in a single call.
if ( $level < $current_level || $ci_level < $current_ci_level ) {
$self->lp_decreasing_depth($ii);
}
#------------------------
# handle increasing depth
#------------------------
if ( $level > $current_level || $ci_level > $current_ci_level ) {
$self->lp_increasing_depth($ii);
}
#------------------
# Handle all tokens
#------------------
if ( $type ne 'b' ) {
# Count commas and look for non-list characters. Once we see a
# non-list character, we give up and don't look for any more
# commas.
if ( $type eq '=>' ) {
$lp_arrow_count{$total_depth}++;
# remember '=>' like '=' for estimating breaks (but see
# above note for b1035)
$last_lp_equals{$total_depth} = $ii;
}
elsif ( $type eq ',' ) {
$lp_comma_count{$total_depth}++;
}
elsif ( $is_assignment{$type} ) {
$last_lp_equals{$total_depth} = $ii;
}
else {
## not a special type
}
# this token might start a new line if ..
if (
$ii > $ii_begin_line
&& (
# this is the first nonblank token of the line
$ii == 1 && $types_to_go[0] eq 'b'
# or previous character was one of these:
# /^([\:\?\,f])$/
|| $hash_test2{$last_nonblank_type}
# or previous character was opening and this is not
# closing
|| ( $last_nonblank_type eq '{' && $type ne '}' )
|| ( $last_nonblank_type eq '(' and $type ne ')' )
# or this token is one of these:
# /^([\.]|\|\||\&\&)$/
|| $hash_test3{$type}
# or this is a closing structure
|| ( $last_nonblank_type eq '}'
&& $last_nonblank_token eq $last_nonblank_type )
# or previous token was keyword 'return'
|| (
$last_nonblank_type eq 'k'
&& ( $last_nonblank_token eq 'return'
&& $type ne '{' )
)
# or starting a new line at certain keywords is fine
|| ( $type eq 'k'
&& $is_if_unless_and_or_last_next_redo_return{
$token} )
# or this is after an assignment after a closing
# structure
|| (
$is_assignment{$last_nonblank_type}
&& (
# /^[\}\)\]]$/
$hash_test1{$last_last_nonblank_type}
# and it is significantly to the right
|| $lp_position_predictor > (
$maximum_line_length_at_level[$level] -
$rOpts_maximum_line_length / 2
)
)
)
)
)
{
check_for_long_gnu_style_lines($ii);
$ii_begin_line = $ii;
# back up 1 token if we want to break before that type
# otherwise, we may strand tokens like '?' or ':' on a line
if ( $ii_begin_line > 0 ) {
my $wbb =
$last_nonblank_type eq 'k'
? $want_break_before{$last_nonblank_token}
: $want_break_before{$last_nonblank_type};
$ii_begin_line-- if ($wbb);
}
}
$K_last_nonblank = $K_to_go[$ii];
$last_last_nonblank_type = $last_nonblank_type;
$last_nonblank_type = $type;
$last_nonblank_token = $token;
} ## end if ( $type ne 'b' )
# remember the predicted position of this token on the output line
if ( $ii > $ii_begin_line ) {
## NOTE: this is a critical loop - the following call has been
## expanded for about 2x speedup:
## $lp_position_predictor =
## total_line_length( $ii_begin_line, $ii );
my $indentation = $leading_spaces_to_go[$ii_begin_line];
if ( ref($indentation) ) {
$indentation = $indentation->get_spaces();
}
$lp_position_predictor =
$indentation +
$summed_lengths_to_go[ $ii + 1 ] -
$summed_lengths_to_go[$ii_begin_line];
}
else {
$lp_position_predictor =
$space_count + $token_lengths_to_go[$ii];
}
# Store the indentation object for this token.
# This allows us to manipulate the leading whitespace
# (in case we have to reduce indentation to fit a line) without
# having to change any token values.
#---------------------------------------------------------------
# replace leading whitespace with indentation objects where used
#---------------------------------------------------------------
if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
$lp_object_count_this_batch++;
my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
$leading_spaces_to_go[$ii] = $lp_object;
if ( $max_lp_stack > 0
&& $ci_level
&& $rLP->[ $max_lp_stack - 1 ]->[_lp_object_] )
{
$reduced_spaces_to_go[$ii] =
$rLP->[ $max_lp_stack - 1 ]->[_lp_object_];
}
else {
$reduced_spaces_to_go[$ii] = $lp_object;
}
}
} ## end loop over all tokens in this batch
undo_incomplete_lp_indentation()
if ( !$rOpts_extended_line_up_parentheses );
return $lp_object_count_this_batch;
} ## end sub set_lp_indentation
sub lp_equals_break_check {
my ( $self, $ii, $ii_last_equals ) = @_;
# If we come to an opening token after an '=' token of some
# type, see if it would be helpful to 'break' after the '=' to
# save space.
# Given:
# $ii = index of an opening token in the output batch
# $ii_begin_line = index of token starting next output line
# Update:
# $lp_position_predictor - updated position predictor
# $ii_begin_line = updated starting token index
# Skip an empty set of parens, such as after channel():
# my $exchange = $self->_channel()->exchange(
# This fixes issues b1318 b1322 b1323 b1328
my $is_empty_container;
if ( $ii_last_equals && $ii < $max_index_to_go ) {
my $seqno = $type_sequence_to_go[$ii];
my $inext_nb = $ii + 1;
$inext_nb++
if ( $types_to_go[$inext_nb] eq 'b' );
my $seqno_nb = $type_sequence_to_go[$inext_nb];
$is_empty_container = $seqno && $seqno_nb && $seqno_nb == $seqno;
}
if ( $ii_last_equals
&& $ii_last_equals > $ii_begin_line
&& !$is_empty_container )
{
my $seqno = $type_sequence_to_go[$ii];
# find the position if we break at the '='
my $i_test = $ii_last_equals;
# Fix for issue b1229, check if want break before this token
# Fix for issue b1356, if i_test is a blank, the leading spaces may
# be incorrect (if it was an interline blank).
# Fix for issue b1357 .. b1370, i_test must be prev nonblank
# ( the ci value for blanks can vary )
# See also case b223
# Fix for issue b1371-b1374 : all of these and the above are fixed
# by simply backing up one index and setting the leading spaces of
# a blank equal to that of the equals.
if ( $want_break_before{ $types_to_go[$i_test] } ) {
$i_test -= 1;
$leading_spaces_to_go[$i_test] =
$leading_spaces_to_go[$ii_last_equals]
if ( $types_to_go[$i_test] eq 'b' );
}
elsif ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
else {
# no change needed
}
my $test_position = total_line_length( $i_test, $ii );
my $mll = $maximum_line_length_at_level[ $levels_to_go[$i_test] ];
#------------------------------------------------------
# Break if structure will reach the maximum line length
#------------------------------------------------------
# Historically, -lp just used one-half line length here
my $len_increase = $rOpts_maximum_line_length / 2;
# For -xlp, we can also use the pre-computed lengths
my $min_len = $self->[_rcollapsed_length_by_seqno_]->{$seqno};
if ( $min_len && $min_len > $len_increase ) {
$len_increase = $min_len;
}
if (
# if we might exceed the maximum line length
$lp_position_predictor + $len_increase > $mll
# if a -bbx flag WANTS a break before this opening token
|| ( $seqno
&& $self->[_rbreak_before_container_by_seqno_]->{$seqno} )
# or we are beyond the 1/4 point and there was an old
# break at an assignment (not '=>') [fix for b1035]
|| (
$lp_position_predictor >
$mll - $rOpts_maximum_line_length * 3 / 4
&& $types_to_go[$ii_last_equals] ne '=>'
&& (
$old_breakpoint_to_go[$ii_last_equals]
|| ( $ii_last_equals > 0
&& $old_breakpoint_to_go[ $ii_last_equals - 1 ] )
|| ( $ii_last_equals > 1
&& $types_to_go[ $ii_last_equals - 1 ] eq 'b'
&& $old_breakpoint_to_go[ $ii_last_equals - 2 ] )
)
)
)
{
# then make the switch -- note that we do not set a
# real breakpoint here because we may not really need
# one; sub break_lists will do that if necessary.
my $Kc = $self->[_K_closing_container_]->{$seqno};
if (
# For -lp, only if the closing token is in this
# batch (c117). Otherwise it cannot be done by sub
# break_lists.
defined($Kc) && $Kc <= $K_to_go[$max_index_to_go]
# For -xlp, we only need one nonblank token after
# the opening token.
|| $rOpts_extended_line_up_parentheses
)
{
$ii_begin_line = $i_test + 1;
$lp_position_predictor = $test_position;
#--------------------------------------------------
# Fix for an opening container terminating a batch:
#--------------------------------------------------
# To get alignment of a -lp container with its
# contents, we have to put a break after $i_test.
# For $ii<$max_index_to_go, this will be done by
# sub break_lists based on the indentation object.
# But for $ii=$max_index_to_go, the indentation
# object for this seqno will not be created until
# the next batch, so we have to set a break at
# $i_test right now in order to get one.
if ( $ii == $max_index_to_go
&& !$block_type_to_go[$ii]
&& $types_to_go[$ii] eq '{'
&& $seqno
&& !$self->[_ris_excluded_lp_container_]->{$seqno} )
{
$self->set_forced_lp_break( $ii_begin_line, $ii );
}
}
}
}
return;
} ## end sub lp_equals_break_check
sub lp_decreasing_depth {
my ( $self, $ii ) = @_;
# This is called by sub set_lp_indentation for a token at index $ii
# which has a lower nesting depth compared to the previous token.
# We have to update the stack variables for the new indentation.
my $rLL = $self->[_rLL_];
my $level = $levels_to_go[$ii];
my $ci_level = $ci_levels_to_go[$ii];
# loop to decrease $max_lp_stack until we find the first entry at or
# completely below this level
while (1) {
# Be sure we have not hit the stack bottom - should never
# happen because only negative levels can get here, and
# $level was forced to be positive above.
if ( $max_lp_stack <= 0 ) {
# non-fatal, just keep going except in DEVEL_MODE
if (DEVEL_MODE) {
Fault(<<EOM);
program bug with -lp: stack_error. level=$level; ci_level=$ci_level; rerun with -nlp
EOM
}
last;
}
# save index of token which closes this level
if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
my $lp_object = $rLP->[$max_lp_stack]->[_lp_object_];
$lp_object->set_closed($ii);
my $comma_count = 0;
my $arrow_count = 0;
my $type = $types_to_go[$ii];
if ( $type eq '}' || $type eq ')' ) {
my $total_depth = $nesting_depth_to_go[$ii];
$comma_count = $lp_comma_count{$total_depth};
$arrow_count = $lp_arrow_count{$total_depth};
$comma_count = 0 if ( !defined($comma_count) );
$arrow_count = 0 if ( !defined($arrow_count) );
}
$lp_object->set_comma_count($comma_count);
$lp_object->set_arrow_count($arrow_count);
# Undo any extra indentation if we saw no commas
my $available_spaces = $lp_object->get_available_spaces();
my $K_start = $lp_object->get_K_begin_line();
if ( $available_spaces > 0
&& $K_start >= $K_to_go[0]
&& ( $comma_count <= 0 || $arrow_count > 0 ) )
{
my $i = $lp_object->get_lp_item_index();
# Safety check for a valid stack index. It
# should be ok because we just checked that the
# index K of the token associated with this
# indentation is in this batch.
if ( $i < 0 || $i > $max_lp_object_list ) {
my $KK = $K_to_go[$ii];
my $lno = $rLL->[$KK]->[_LINE_INDEX_];
DEVEL_MODE && Fault(<<EOM);
Program bug with -lp near line $lno. Stack index i=$i should be >=0 and <= max=$max_lp_object_list
EOM
last;
}
if ( $arrow_count == 0 ) {
$rlp_object_list->[$i]
->permanently_decrease_available_spaces(
$available_spaces);
}
else {
$rlp_object_list->[$i]
->tentatively_decrease_available_spaces(
$available_spaces);
}
foreach my $j ( $i + 1 .. $max_lp_object_list ) {
$rlp_object_list->[$j]
->decrease_SPACES($available_spaces);
}
}
}
#------------------
# go down one level
#------------------
--$max_lp_stack;
my $rLP_top = $rLP->[$max_lp_stack];
my $ci_lev = $rLP_top->[_lp_ci_level_];
my $lev = $rLP_top->[_lp_level_];
my $spaces = $rLP_top->[_lp_space_count_];
if ( $rLP_top->[_lp_object_] ) {
my $lp_obj = $rLP_top->[_lp_object_];
( $spaces, $lev, $ci_lev ) =
@{ $lp_obj->get_spaces_level_ci() };
}
# stop when we reach a level at or below the current
# level
if ( $lev <= $level && $ci_lev <= $ci_level ) {
$space_count = $spaces;
$current_level = $lev;
$current_ci_level = $ci_lev;
last;
}
} ## end while (1)
return;
} ## end sub lp_decreasing_depth
sub lp_increasing_depth {
my ( $self, $ii ) = @_;
# This is called by sub set_lp_indentation for a token at index $ii
# which has an increasing nesting depth compared to the previous token.
# We have to update the stack variables for the new indentation.
my $rLL = $self->[_rLL_];
my $type = $types_to_go[$ii];
my $level = $levels_to_go[$ii];
my $ci_level = $ci_levels_to_go[$ii];
$stack_changed = 1;
# Compute the standard incremental whitespace. This will be
# the minimum incremental whitespace that will be used. This
# choice results in a smooth transition between the gnu-style
# and the standard style.
my $standard_increment =
( $level - $current_level ) * $rOpts_indent_columns +
( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
# Now we have to define how much extra incremental space
# ("$available_space") we want. This extra space will be
# reduced as necessary when long lines are encountered or when
# it becomes clear that we do not have a good list.
my $available_spaces = 0;
my $align_seqno = 0;
my $K_extra_space;
my $last_nonblank_seqno;
my $last_nonblank_block_type;
if ( defined($K_last_nonblank) ) {
$last_nonblank_seqno = $rLL->[$K_last_nonblank]->[_TYPE_SEQUENCE_];
$last_nonblank_block_type =
$last_nonblank_seqno
? $self->[_rblock_type_of_seqno_]->{$last_nonblank_seqno}
: undef;
}
$in_lp_mode = $rLP->[$max_lp_stack]->[_lp_object_];
#-----------------------------------------------
# Initialize indentation spaces on empty stack..
#-----------------------------------------------
if ( $max_lp_stack == 0 ) {
$space_count = $level * $rOpts_indent_columns;
}
#----------------------------------------
# Add the standard space increment if ...
#----------------------------------------
elsif (
# if this is a BLOCK, add the standard increment
$last_nonblank_block_type
# or if this is not a sequenced item
|| !$last_nonblank_seqno
# or this container is excluded by user rules
# or contains here-docs or multiline qw text
|| defined($last_nonblank_seqno)
&& $self->[_ris_excluded_lp_container_]->{$last_nonblank_seqno}
# or if last nonblank token was not structural indentation
|| $last_nonblank_type ne '{'
# and do not start -lp under stress .. fixes b1244, b1255
|| !$in_lp_mode && $level >= $high_stress_level
)
{
# If we have entered lp mode, use the top lp object to get
# the current indentation spaces because it may have
# changed. Fixes b1285, b1286.
if ($in_lp_mode) {
$space_count = $in_lp_mode->get_spaces();
}
$space_count += $standard_increment;
}
#---------------------------------------------------------------
# -lp mode: try to use space to the first non-blank level change
#---------------------------------------------------------------
else {
# see how much space we have available
my $test_space_count = $lp_position_predictor;
my $excess = 0;
my $min_len =
$self->[_rcollapsed_length_by_seqno_]->{$last_nonblank_seqno};
my $next_opening_too_far;
if ( defined($min_len) ) {
$excess =
$test_space_count +
$min_len -
$maximum_line_length_at_level[$level];
if ( $excess > 0 ) {
$test_space_count -= $excess;
# will the next opening token be a long way out?
$next_opening_too_far =
$lp_position_predictor + $excess >
$maximum_line_length_at_level[$level];
}
}
my $rLP_top = $rLP->[$max_lp_stack];
my $min_gnu_indentation = $rLP_top->[_lp_space_count_];
if ( $rLP_top->[_lp_object_] ) {
$min_gnu_indentation = $rLP_top->[_lp_object_]->get_spaces();
}
$available_spaces = $test_space_count - $min_gnu_indentation;
# Fix for combo -naws and -xlp (b1501; also b1466)
my $tol = !$rOpts_add_whitespace
&& $rOpts_extended_line_up_parentheses ? 1 : 0;
# Do not startup -lp indentation mode if no space ...
# ... or if it puts the opening far to the right
if ( !$in_lp_mode
&& ( $available_spaces <= $tol || $next_opening_too_far ) )
{
$space_count += $standard_increment;
$available_spaces = 0;
}
# Use -lp mode
else {
$space_count = $test_space_count;
$in_lp_mode = 1;
if ( $available_spaces >= $standard_increment ) {
$min_gnu_indentation += $standard_increment;
}
elsif ( $available_spaces > 1 ) {
$min_gnu_indentation += $available_spaces + 1;
# The "+1" space can cause mis-alignment if there is no
# blank space between the opening paren and the next
# nonblank token (i.e., -pt=2) and the container does not
# get broken open. So we will mark this token for later
# space removal by sub 'xlp_tweak' if this container
# remains intact (issue git #106).
if (
$type ne 'b'
# Skip if the maximum line length is exceeded here
&& $excess <= 0
# This is only for level changes, not ci level changes.
# But note: this test is here out of caution but I have
# not found a case where it is actually necessary.
&& $is_opening_token{$last_nonblank_token}
# Be sure we are at consecutive nonblanks. This test
# should be true, but it guards against future coding
# changes to level values assigned to blank spaces.
&& $ii > 0
&& $types_to_go[ $ii - 1 ] ne 'b'
)
{
$K_extra_space = $K_to_go[$ii];
}
}
elsif ( $is_opening_token{$last_nonblank_token} ) {
if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
$min_gnu_indentation += 2;
}
else {
$min_gnu_indentation += 1;
}
}
else {
$min_gnu_indentation += $standard_increment;
}
$available_spaces = $space_count - $min_gnu_indentation;
if ( $available_spaces < 0 ) {
$space_count = $min_gnu_indentation;
$available_spaces = 0;
}
$align_seqno = $last_nonblank_seqno;
}
}
#-------------------------------------------
# update the state, but not on a blank token
#-------------------------------------------
if ( $type ne 'b' ) {
if ( $rLP->[$max_lp_stack]->[_lp_object_] ) {
$rLP->[$max_lp_stack]->[_lp_object_]->set_have_child(1);
$in_lp_mode = 1;
}
#----------------------------------------
# Create indentation object if in lp-mode
#----------------------------------------
++$max_lp_stack;
my $lp_object;
if ($in_lp_mode) {
# A negative level implies not to store the item in the
# item_list
my $lp_item_index = 0;
if ( $level >= 0 ) {
$lp_item_index = ++$max_lp_object_list;
}
my $K_begin_line = 0;
if ( $ii_begin_line >= 0
&& $ii_begin_line <= $max_index_to_go )
{
$K_begin_line = $K_to_go[$ii_begin_line];
}
# Minor Fix: when creating indentation at a side
# comment we don't know what the space to the actual
# next code token will be. We will allow a space for
# sub correct_lp to move it in if necessary.
# NOTE for c314, c400: this fix is not really necessary,
# and it caused a DEVEL_MODE fault when -i=0.
# It could be completely removed, but this would change
# existing formatting in a few cases. So for now, the fix
# is to only skip this if -i=0.
if (
$type eq '#'
&& $max_index_to_go > 0
&& $align_seqno
# fix for c314, c400 (see above note)
&& $rOpts_indent_columns > 0
)
{
$available_spaces += 1;
}
my $standard_spaces = $leading_spaces_to_go[$ii];
$lp_object = Perl::Tidy::IndentationItem->new(
spaces => $space_count,
level => $level,
ci_level => $ci_level,
available_spaces => $available_spaces,
lp_item_index => $lp_item_index,
align_seqno => $align_seqno,
K_begin_line => $K_begin_line,
standard_spaces => $standard_spaces,
K_extra_space => $K_extra_space,
);
DEBUG_LP && do {
my $tok_beg = $rLL->[$K_begin_line]->[_TOKEN_];
my $token = $tokens_to_go[$ii];
print {*STDOUT} <<EOM;
DEBUG_LP: Created object at tok=$token type=$type for seqno $align_seqno level=$level ci=$ci_level spaces=$space_count avail=$available_spaces kbeg=$K_begin_line tokbeg=$tok_beg lp=$lp_position_predictor
EOM
};
if ( $level >= 0 ) {
$rlp_object_list->[$max_lp_object_list] = $lp_object;
}
if ( $is_opening_token{$last_nonblank_token}
&& $last_nonblank_seqno )
{
$self->[_rlp_object_by_seqno_]->{$last_nonblank_seqno} =
$lp_object;
}
}
#------------------------------------
# Store this indentation on the stack
#------------------------------------
$rLP->[$max_lp_stack]->[_lp_ci_level_] = $ci_level;
$rLP->[$max_lp_stack]->[_lp_level_] = $level;
$rLP->[$max_lp_stack]->[_lp_object_] = $lp_object;
$rLP->[$max_lp_stack]->[_lp_container_seqno_] =
$last_nonblank_seqno;
$rLP->[$max_lp_stack]->[_lp_space_count_] = $space_count;
# If the opening paren is beyond the half-line length, then
# we will use the minimum (standard) indentation. This will
# help avoid problems associated with running out of space
# near the end of a line. As a result, in deeply nested
# lists, there will be some indentations which are limited
# to this minimum standard indentation. But the most deeply
# nested container will still probably be able to shift its
# parameters to the right for proper alignment, so in most
# cases this will not be noticeable.
if ( $available_spaces > 0 && $lp_object ) {
my $halfway =
$maximum_line_length_at_level[$level] -
$rOpts_maximum_line_length / 2;
$lp_object->tentatively_decrease_available_spaces(
$available_spaces)
if ( $space_count > $halfway );
}
}
return;
} ## end sub lp_increasing_depth
sub check_for_long_gnu_style_lines {
# Look at the current estimated maximum line length, and
# remove some whitespace if it exceeds the desired maximum
my ($ii_to_go) = @_;
# Given:
# $ii_to_go = index of current token under consideration
# nothing can be done if no stack items defined for this line
return if ( $max_lp_object_list < 0 );
# See if we have exceeded the maximum desired line length ..
# keep 2 extra free because they are needed in some cases
# (result of trial-and-error testing)
my $tol = 2;
# But reduce tol to 0 at a terminal comma; fixes b1432
if ( $types_to_go[$ii_to_go] eq ','
&& $ii_to_go < $max_index_to_go )
{
my $in = $ii_to_go + 1;
if ( $types_to_go[$in] eq 'b' && $in < $max_index_to_go ) { $in++ }
if ( $is_closing_token{ $tokens_to_go[$in] } ) {
$tol = 0;
}
}
my $spaces_needed =
$lp_position_predictor -
$maximum_line_length_at_level[ $levels_to_go[$ii_to_go] ] +
$tol;
return if ( $spaces_needed <= 0 );
# We are over the limit, so try to remove a requested number of
# spaces from leading whitespace. We are only allowed to remove
# from whitespace items created on this batch, since others have
# already been used and cannot be undone.
my @candidates = ();
# loop over all whitespace items created for the current batch
foreach my $i ( 0 .. $max_lp_object_list ) {
my $item = $rlp_object_list->[$i];
# item must still be open to be a candidate (otherwise it
# cannot influence the current token)
next if ( $item->get_closed() >= 0 );
my $available_spaces = $item->get_available_spaces();
if ( $available_spaces > 0 ) {
push( @candidates, [ $i, $available_spaces ] );
}
}
return unless (@candidates);
# sort by available whitespace so that we can remove whitespace
# from the maximum available first.
@candidates =
sort { $b->[1] <=> $a->[1] || $a->[0] <=> $b->[0] } @candidates;
# keep removing whitespace until we are done or have no more
foreach my $candidate (@candidates) {
my ( $i, $available_spaces ) = @{$candidate};
my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
# remove the incremental space from this item
$rlp_object_list->[$i]->decrease_available_spaces($deleted_spaces);
my $i_debug = $i;
# update the leading whitespace of this item and all items
# that came after it
$i -= 1;
while ( ++$i <= $max_lp_object_list ) {
my $old_spaces = $rlp_object_list->[$i]->get_spaces();
if ( $old_spaces >= $deleted_spaces ) {
$rlp_object_list->[$i]->decrease_SPACES($deleted_spaces);
}
# shouldn't happen except for code bug:
else {
# non-fatal, keep going except in DEVEL_MODE
if (DEVEL_MODE) {
my $level = $rlp_object_list->[$i_debug]->get_level();
my $ci_level =
$rlp_object_list->[$i_debug]->get_ci_level();
my $old_level = $rlp_object_list->[$i]->get_level();
my $old_ci_level_uu =
$rlp_object_list->[$i]->get_ci_level();
Fault(<<EOM);
program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level
EOM
}
}
} ## end while ( ++$i <= $max_lp_object_list)
$lp_position_predictor -= $deleted_spaces;
$spaces_needed -= $deleted_spaces;
last if ( $spaces_needed <= 0 );
}
return;
} ## end sub check_for_long_gnu_style_lines
sub undo_incomplete_lp_indentation {
#------------------------------------------------------------------
# Undo indentation for all incomplete -lp indentation levels of the
# current batch unless -xlp is set.
#------------------------------------------------------------------
# This routine is called once after each output stream batch is
# finished to undo indentation for all incomplete -lp indentation
# levels. If this routine is called then comments and blank lines will
# disrupt this indentation style. In older versions of perltidy this
# was always done because it could cause problems otherwise, but recent
# improvements allow fairly good results to be obtained by skipping
# this step with the -xlp flag.
# nothing to do if no stack items defined for this line
return if ( $max_lp_object_list < 0 );
# loop over all whitespace items created for the current batch
foreach my $i ( 0 .. $max_lp_object_list ) {
my $item = $rlp_object_list->[$i];
# only look for open items
next if ( $item->get_closed() >= 0 );
# Tentatively remove all of the available space
# (The vertical aligner will try to get it back later)
my $available_spaces = $item->get_available_spaces();
if ( $available_spaces > 0 ) {
# delete incremental space for this item
$rlp_object_list->[$i]
->tentatively_decrease_available_spaces($available_spaces);
# Reduce the total indentation space of any nodes that follow
# Note that any such nodes must necessarily be dependents
# of this node.
foreach ( $i + 1 .. $max_lp_object_list ) {
$rlp_object_list->[$_]->decrease_SPACES($available_spaces);
}
}
}
return;
} ## end sub undo_incomplete_lp_indentation
} ## end closure set_lp_indentation
#----------------------------------------------------------------------
# sub to set a requested break before an opening container in -lp mode.
#----------------------------------------------------------------------
sub set_forced_lp_break {
my ( $self, $i_begin_line, $i_opening ) = @_;
# Given:
# $i_begin_line = index of break in the _to_go arrays
# $i_opening = index of the opening container
# Set any requested break at a token before this opening container
# token. This is often an '=' or '=>' but can also be things like
# '.', ',', 'return'. It was defined by sub set_lp_indentation.
# Important:
# For intact containers, call this at the closing token.
# For broken containers, call this at the opening token.
# This will avoid needless breaks when it turns out that the
# container does not actually get broken. This isn't known until
# the closing container for intact blocks.
return
if ( $i_begin_line < 0
|| $i_begin_line > $max_index_to_go );
# Handle request to put a break break immediately before this token.
# We may not want to do that since we are also breaking after it.
if ( $i_begin_line == $i_opening ) {
# The following rules should be reviewed. We may want to always
# allow the break. If we do not do the break, the indentation
# may be off.
# RULE: don't break before it unless it is welded to a qw.
# This works well, but we may want to relax this to allow
# breaks in additional cases.
return
if ( !$self->[_rK_weld_right_]->{ $K_to_go[$i_opening] } );
return unless ( $types_to_go[$max_index_to_go] eq 'q' );
}
# Only break for breakpoints at the same
# indentation level as the opening paren
my $test1 = $nesting_depth_to_go[$i_opening];
my $test2 = $nesting_depth_to_go[$i_begin_line];
return if ( $test2 != $test1 );
# Back up at a blank (fixes case b932)
my $ibr = $i_begin_line - 1;
if ( $ibr > 0
&& $types_to_go[$ibr] eq 'b' )
{
$ibr--;
}
if ( $ibr >= 0 ) {
my $i_nonblank = $self->set_forced_breakpoint($ibr);
# Crude patch to prevent sub recombine_breakpoints from undoing
# this break, especially after an '='. It will leave old
# breakpoints alone. See c098/x045 for some examples.
if ( defined($i_nonblank) ) {
$old_breakpoint_to_go[$i_nonblank] = 1;
}
}
return;
} ## end sub set_forced_lp_break
sub reduce_lp_indentation {
my ( $self, $i, $spaces_wanted ) = @_;
# Reduce the leading whitespace at token $i if possible by $spaces_wanted
# (a large value of $spaces_wanted will remove all excess space)
# NOTE: to be called from break_lists only for a sequence of tokens
# contained between opening and closing parens/braces/brackets
my $deleted_spaces = 0;
my $item = $leading_spaces_to_go[$i];
my $available_spaces = $item->get_available_spaces();
if (
$available_spaces > 0
&& ( ( $spaces_wanted <= $available_spaces )
|| !$item->get_have_child() )
)
{
# we'll remove these spaces, but mark them as recoverable
$deleted_spaces =
$item->tentatively_decrease_available_spaces($spaces_wanted);
}
return $deleted_spaces;
} ## end sub reduce_lp_indentation
###########################################################
# CODE SECTION 13: Preparing batches for vertical alignment
###########################################################
sub check_convey_batch_input {
my ($self) = @_;
# Check for valid input to sub convey_batch_to_vertical_aligner. An
# error here would most likely be due to an error in the calling
# routine 'sub grind_batch_of_CODE'.
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
# $ri_first = ref to list of starting line indexes in _to_go arrays
# $ri_last = ref to list of ending line indexes in _to_go arrays
if ( !defined($ri_first) || !defined($ri_last) ) {
Fault(<<EOM);
Undefined line ranges ri_first and/r ri_last
EOM
}
my $nmax = @{$ri_first} - 1;
my $nmax_check = @{$ri_last} - 1;
if ( $nmax < 0 || $nmax_check < 0 || $nmax != $nmax_check ) {
Fault(<<EOM);
Line range index error: nmax=$nmax but nmax_check=$nmax_check
These should be equal and >=0
EOM
}
my ( $ibeg, $iend );
foreach my $n ( 0 .. $nmax ) {
my $iend_m = $iend;
$ibeg = $ri_first->[$n];
$iend = $ri_last->[$n];
if ( $ibeg < 0 || $iend < $ibeg || $iend > $max_index_to_go ) {
Fault(<<EOM);
Bad line range at line index $n of $nmax: ibeg=$ibeg, iend=$iend
These should have iend >= ibeg and be in the range (0..$max_index_to_go)
EOM
}
next if ( $n == 0 );
if ( $ibeg <= $iend_m ) {
Fault(<<EOM);
Line ranges overlap: iend=$iend_m at line $n-1 but ibeg=$ibeg for line $n
EOM
}
}
return;
} ## end sub check_convey_batch_input
sub convey_batch_to_vertical_aligner {
my ($self) = @_;
# This routine receives a batch of code for which the final line breaks
# have been defined. Here we prepare the lines for passing to the vertical
# aligner. We do the following tasks:
# - mark certain vertical alignment tokens, such as '=', in each line
# - make final indentation adjustments
# - do logical padding: insert extra blank spaces to help display certain
# logical constructions
# - send the line to the vertical aligner
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
my $ris_list_by_seqno = $self->[_ris_list_by_seqno_];
my $this_batch = $self->[_this_batch_];
my $do_not_pad = $this_batch->[_do_not_pad_];
my $batch_CODE_type = $this_batch->[_batch_CODE_type_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
$self->check_convey_batch_input() if (DEVEL_MODE);
my $n_last_line = @{$ri_first} - 1;
my $ibeg = $ri_first->[0];
my $iend = $ri_last->[0];
my $type_beg = $types_to_go[$ibeg];
my $type_end = $types_to_go[$iend];
my $token_beg = $tokens_to_go[$ibeg];
my $rindentation_list = [0]; # ref to indentations for each line
my ( $cscw_block_comment, $closing_side_comment, $is_block_comment,
$is_HSC );
if ( !$max_index_to_go
&& $type_beg eq '#' )
{
if ( $batch_CODE_type && $batch_CODE_type eq 'HSC' ) { $is_HSC = 1 }
else { $is_block_comment = 1 }
}
if ($rOpts_closing_side_comments) {
( $closing_side_comment, $cscw_block_comment ) =
$self->add_closing_side_comment();
}
# for multi-line batches ...
if ( $n_last_line > 0 ) {
# undo continuation indentation
$self->undo_ci();
# flush before a long if statement to avoid unwanted alignment
$self->flush_vertical_aligner()
if ( $type_beg eq 'k'
&& $is_if_unless{$token_beg} );
$self->set_logical_padding()
if ($rOpts_logical_padding);
$self->xlp_tweak()
if ($rOpts_extended_line_up_parentheses);
}
# -xci must undo continuation indentation even for single lines
elsif ($rOpts_extended_continuation_indentation) {
$self->undo_ci();
}
else {
# ok: single line, no -xci
}
if (DEVEL_MODE) { $self->check_batch_summed_lengths() }
# ----------------------------------------------------------
# define the vertical alignments for all lines of this batch
# ----------------------------------------------------------
my $rline_alignments;
# Quick handling of lines with a single tokens
if ( !$max_index_to_go ) {
# Hanging side comment
if ($is_HSC) {
$rline_alignments = make_HSC_vertical_alignments();
}
# All Other single tokens
# = [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
else {
$rline_alignments = [
[
[],
[ $tokens_to_go[0] ],
[ $types_to_go[0] ],
[ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ],
]
];
}
}
# Multiple tokens
else {
$rline_alignments = $self->make_vertical_alignments();
}
# ----------------------------------------------
# loop to send each line to the vertical aligner
# ----------------------------------------------
my ( $type_beg_last, $type_end_last, $ibeg_next, $iend_next, $ljump );
foreach my $nline ( 0 .. $n_last_line ) {
# ----------------------------------------------------------------
# This hash will hold the args for vertical alignment of this line
# We will populate it as we go.
# ----------------------------------------------------------------
my $rvao_args = {};
if ( $nline > 0 ) {
$type_beg_last = $type_beg;
$type_end_last = $type_end;
$ibeg = $ibeg_next;
$iend = $iend_next;
$type_beg = $types_to_go[$ibeg];
$type_end = $types_to_go[$iend];
$token_beg = $tokens_to_go[$ibeg];
}
my $Kbeg = $K_to_go[$ibeg];
my $Kend = $K_to_go[$iend];
if ( $nline < $n_last_line ) {
$ibeg_next = $ri_first->[ $nline + 1 ];
$iend_next = $ri_last->[ $nline + 1 ];
my $Kbeg_next = $K_to_go[$ibeg_next];
$ljump =
$rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
}
elsif ( !$is_block_comment && $Kend < $Klimit ) {
# Patch for git #51, a bare closing qw paren was not outdented
# if the flag '-nodelete-old-newlines is set
# Note that we are just looking ahead for the next nonblank
# character. We could scan past an arbitrary number of block
# comments or hanging side comments by calling K_next_code, but it
# could add significant run time with very little to be gained.
my $Kbeg_next = $Kend + 1;
if ( $Kbeg_next < $Klimit
&& $rLL->[$Kbeg_next]->[_TYPE_] eq 'b' )
{
$Kbeg_next += 1;
}
$ljump =
$rLL->[$Kbeg_next]->[_LEVEL_] - $rLL->[$Kend]->[_LEVEL_];
}
else {
$ljump = 0;
}
# ---------------------------------------------------
# Define the check value 'Kend' to send for this line
# ---------------------------------------------------
# The 'Kend' value is an integer for checking that lines come out of
# the far end of the pipeline in the right order. It increases
# linearly along the token stream. But we only send ending K values of
# non-comments down the pipeline. This is equivalent to checking that
# the last CODE_type is blank or equal to 'VER'. See also sub
# resync_lines_and_tokens for related coding. Note that
# '$batch_CODE_type' is the code type of the line to which the ending
# token belongs.
if ( !$batch_CODE_type || $batch_CODE_type eq 'VER' ) {
$rvao_args->{Kend} = $Kend;
}
# ---------------------------------------------
# get the vertical alignment info for this line
# ---------------------------------------------
# The lines are broken into fields which can be spaced by the vertical
# to achieve vertical alignment. These fields are the actual text
# which will be output, so from here on no more changes can be made to
# the text.
my $rline_alignment = $rline_alignments->[$nline];
# Programming check: (shouldn't happen)
# The number of tokens which separate the fields must always be
# one less than the number of fields. If this is not true then
# an error has been introduced in sub make_alignment_patterns.
if (DEVEL_MODE) {
my ( $rtokens, $rfields, $rpatterns_uu, $rfield_lengths_uu ) =
@{$rline_alignment};
if ( @{$rfields} && ( @{$rtokens} != ( @{$rfields} - 1 ) ) ) {
my $nt = @{$rtokens};
my $nf = @{$rfields};
my $msg = <<EOM;
Program bug in Perl::Tidy::Formatter, probably in sub 'make_alignment_patterns':
The number of tokens = $nt should be one less than number of fields: $nf
EOM
Fault($msg);
}
}
# --------------------------------------
# get the final indentation of this line
# --------------------------------------
my (
$indentation,
$lev,
$level_end,
$i_terminal,
$is_outdented_line,
) = $self->get_final_indentation(
$ibeg,
$iend,
$rindentation_list,
$ljump,
);
# --------------------------------
# define flag 'outdent_long_lines'
# --------------------------------
if (
# we will allow outdenting of long lines..
# which are long quotes, if allowed
( $type_beg eq 'Q' && $rOpts_outdent_long_quotes )
# which are long block comments, if allowed
|| (
$type_beg eq '#'
&& $rOpts_outdent_long_comments
# but not if this is a static block comment
&& !$this_batch->[_is_static_block_comment_]
)
)
{
$rvao_args->{outdent_long_lines} = 1;
# convert -lp indentation objects to spaces to allow outdenting
if ( ref($indentation) ) {
$indentation = $indentation->get_spaces();
}
}
# --------------------------------------------------
# define flags 'break_alignment_before' and '_after'
# --------------------------------------------------
# These flags tell the vertical aligner to stop alignment before or
# after this line.
if ($is_outdented_line) {
$rvao_args->{break_alignment_before} = 1;
$rvao_args->{break_alignment_after} = 1;
}
elsif ($do_not_pad) {
$rvao_args->{break_alignment_before} = 1;
}
# flush at an 'if' which follows a line with (1) terminal semicolon
# or (2) terminal block_type which is not an 'if'. This prevents
# unwanted alignment between the lines.
elsif ( $token_beg eq 'if' && $type_beg eq 'k' ) {
my $type_m = 'b';
my $block_type_m;
if ( $Kbeg > 0 ) {
my $Km = $Kbeg - 1;
$type_m = $rLL->[$Km]->[_TYPE_];
if ( $type_m eq 'b' && $Km > 0 ) {
$Km -= 1;
$type_m = $rLL->[$Km]->[_TYPE_];
}
if ( $type_m eq '#' && $Km > 0 ) {
$Km -= 1;
$type_m = $rLL->[$Km]->[_TYPE_];
if ( $type_m eq 'b' && $Km > 0 ) {
$Km -= 1;
$type_m = $rLL->[$Km]->[_TYPE_];
}
}
my $seqno_m = $rLL->[$Km]->[_TYPE_SEQUENCE_];
if ($seqno_m) {
$block_type_m = $self->[_rblock_type_of_seqno_]->{$seqno_m};
}
}
# break after anything that is not if-like
if (
$type_m eq ';'
|| ( $type_m eq '}'
&& $block_type_m
&& $block_type_m ne 'if'
&& $block_type_m ne 'unless'
&& $block_type_m ne 'elsif'
&& $block_type_m ne 'else' )
)
{
$rvao_args->{break_alignment_before} = 1;
}
}
else {
# do not need to break vertical alignment here
}
# ----------------------------------
# define 'rvertical_tightness_flags'
# ----------------------------------
# These flags tell the vertical aligner if/when to combine consecutive
# lines, based on the user input parameters.
if ( !$is_block_comment
&& !$self->[_no_vertical_tightness_flags_] )
{
$rvao_args->{rvertical_tightness_flags} =
$self->set_vertical_tightness_flags( $nline,
$closing_side_comment );
}
# ----------------------------------
# define 'is_terminal_ternary' flag
# ----------------------------------
# This flag is set at the final ':' of a ternary chain to request
# vertical alignment of the final term. Here is a slightly complex
# example:
#
# $self->{_text} = (
# !$section ? ''
# : $type eq 'item' ? "the $section entry"
# : "the section on $section"
# )
# . (
# $page
# ? ( $section ? ' in ' : '' ) . "the $page$page_ext manpage"
# : ' elsewhere in this document'
# );
#
if ( $type_beg eq ':' || $nline > 0 && $type_end_last eq ':' ) {
my $is_terminal_ternary = 0;
my $last_leading_type = $nline > 0 ? $type_beg_last : ':';
my $terminal_type = $types_to_go[$i_terminal];
if ( $terminal_type ne ';'
&& $n_last_line > $nline
&& $level_end == $lev )
{
my $Kbeg_next = $K_to_go[$ibeg_next];
$level_end = $rLL->[$Kbeg_next]->[_LEVEL_];
$terminal_type = $rLL->[$Kbeg_next]->[_TYPE_];
}
if (
$last_leading_type eq ':'
&& ( ( $terminal_type eq ';' && $level_end <= $lev )
|| ( $terminal_type ne ':' && $level_end < $lev ) )
)
{
# the terminal term must not contain any ternary terms, as in
# my $ECHO = (
# $Is_MSWin32 ? ".\\echo$$"
# : $Is_MacOS ? ":echo$$"
# : ( $Is_NetWare ? "echo$$" : "./echo$$" )
# );
$is_terminal_ternary = 1;
my $rK_next_seqno_by_K = $self->[_rK_next_seqno_by_K_];
my $KP = $Kbeg;
my $Knext_last = $KP;
while ( defined( $KP = $rK_next_seqno_by_K->[$KP] ) ) {
if ( $KP <= $Knext_last ) {
## shouldn't happen: $rK_next_seqno_by_K is corrupted
DEVEL_MODE && Fault(<<EOM);
Knext should not increase: Knext_last=$Knext_last >= Knext=$KP
EOM
last;
}
$Knext_last = $KP;
last if ( $KP > $Kend );
my $type_KP = $rLL->[$KP]->[_TYPE_];
if ( $type_KP eq '?' || $type_KP eq ':' ) {
$is_terminal_ternary = 0;
last;
}
} ## end while ( defined( $KP = $rK_next_seqno_by_K...))
}
$rvao_args->{is_terminal_ternary} = $is_terminal_ternary;
}
# -------------------------------------------------
# add any new closing side comment to the last line
# -------------------------------------------------
if ( $closing_side_comment && $nline == $n_last_line ) {
my ( $rtokens, $rfields, $rpatterns, $rfield_lengths ) =
@{$rline_alignment};
if ( @{$rfields} ) {
$rfields->[-1] .= " $closing_side_comment";
# NOTE: Patch for csc. We can just use 1 for the length of the csc
# because its length should not be a limiting factor from here on.
$rfield_lengths->[-1] += 2;
# repack
$rline_alignment =
[ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
}
}
# ------------------------
# define flag 'list_seqno'
# ------------------------
# This flag indicates if this line is contained in a multi-line list
if ( !$is_block_comment ) {
my $parent_seqno = $parent_seqno_to_go[$ibeg];
$rvao_args->{list_seqno} = $ris_list_by_seqno->{$parent_seqno};
}
# The alignment tokens have been marked with nesting_depths, so we need
# to pass nesting depths to the vertical aligner. They remain invariant
# under all formatting operations. Previously, level values were sent
# to the aligner. But they can be altered in welding and other
# operations, and this can lead to alignment errors.
my $nesting_depth_beg = $nesting_depth_to_go[$ibeg];
my $nesting_depth_end = $nesting_depth_to_go[$iend];
# A quirk in the definition of nesting depths is that the closing token
# has the same depth as internal tokens. The vertical aligner is
# programmed to expect them to have the lower depth, so we fix this.
if ( $is_closing_type{ $types_to_go[$ibeg] } ) { $nesting_depth_beg-- }
if ( $is_closing_type{ $types_to_go[$iend] } ) { $nesting_depth_end-- }
# Adjust nesting depths to keep -lp indentation for qw lists. This is
# required because qw lists contained in brackets do not get nesting
# depths, but the vertical aligner is watching nesting depth changes to
# decide if a -lp block is intact. Without this patch, qw lists
# enclosed in angle brackets will not get the correct -lp indentation.
# Looking for line with isolated qw ...
if ( $rOpts_line_up_parentheses
&& $type_beg eq 'q'
&& $ibeg == $iend )
{
# ... which is part of a multiline qw
my $Km = $self->K_previous_nonblank($Kbeg);
my $Kp = $self->K_next_nonblank($Kbeg);
if ( defined($Km) && $rLL->[$Km]->[_TYPE_] eq 'q'
|| defined($Kp) && $rLL->[$Kp]->[_TYPE_] eq 'q' )
{
$nesting_depth_beg++;
$nesting_depth_end++;
}
}
# ---------------------------------
# define flag 'forget_side_comment'
# ---------------------------------
# This flag tells the vertical aligner to reset the side comment
# location if we are entering a new block from level 0. This is
# intended to keep side comments from drifting too far to the right.
if ( $block_type_to_go[$i_terminal]
&& $nesting_depth_end > $nesting_depth_beg )
{
$rvao_args->{forget_side_comment} =
!$self->[_radjusted_levels_]->[$Kbeg];
}
# -----------------------------------
# Store the remaining non-flag values
# -----------------------------------
$rvao_args->{ci_level} = $ci_levels_to_go[$ibeg];
$rvao_args->{indentation} = $indentation;
$rvao_args->{level_end} = $nesting_depth_end;
$rvao_args->{level} = $nesting_depth_beg;
$rvao_args->{rline_alignment} = $rline_alignment;
$rvao_args->{maximum_line_length} =
$maximum_line_length_at_level[ $levels_to_go[$ibeg] ];
# --------------------------------------
# send this line to the vertical aligner
# --------------------------------------
my $vao = $self->[_vertical_aligner_object_];
$vao->valign_input($rvao_args);
$do_not_pad = 0;
} ## end of loop to output each line
# Set flag indicating if the last line ends in an opening
# token and is very short, so that a blank line is not
# needed if the subsequent line is a comment.
# Examples of what we are looking for:
# {
# && (
# BEGIN {
# default {
# sub {
$self->[_last_output_short_opening_token_]
# line ends in opening token
# /^[\{\(\[L]$/
= $is_opening_type{$type_end}
# and either
&& (
# line has either single opening token
$iend == $ibeg
# or is a single token followed by opening token.
# Note that sub identifiers have blanks like 'sub doit'
# $token_beg !~ /\s+/
|| ( $iend - $ibeg <= 2 && index( $token_beg, SPACE ) < 0 )
)
# and limit total to 10 character widths
&& token_sequence_length( $ibeg, $iend ) <= 10;
# Remember indentation of lines containing opening containers for
# later use by sub get_final_indentation
$self->save_opening_indentation($rindentation_list)
if ( $this_batch->[_runmatched_opening_indexes_]
|| $types_to_go[$max_index_to_go] eq 'q' );
# Output any new -cscw block comment
if ($cscw_block_comment) {
$self->flush_vertical_aligner();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->write_code_line( $cscw_block_comment . "\n" );
}
return;
} ## end sub convey_batch_to_vertical_aligner
sub check_batch_summed_lengths {
my ( $self, ($msg) ) = @_;
# Debug routine for summed lengths
# $msg = optional debug message
$msg = EMPTY_STRING unless defined($msg);
my $rLL = $self->[_rLL_];
# Verify that the summed lengths are correct. We want to be sure that
# errors have not been introduced by programming changes. Summed lengths
# are defined in sub store_token. Operations like padding and unmasking
# semicolons can change token lengths, but those operations are expected to
# update the summed lengths when they make changes. So the summed lengths
# should always be correct.
foreach my $i ( 0 .. $max_index_to_go ) {
my $len_by_sum =
$summed_lengths_to_go[ $i + 1 ] - $summed_lengths_to_go[$i];
my $len_tok_i = $token_lengths_to_go[$i];
my $KK = $K_to_go[$i];
my $len_tok_K;
# For --indent-only, there is not always agreement between
# token lengths in _rLL_ and token_lengths_to_go, so skip that check.
if ( defined($KK) && !$rOpts_indent_only ) {
$len_tok_K = $rLL->[$KK]->[_TOKEN_LENGTH_];
}
if ( $len_by_sum != $len_tok_i
|| defined($len_tok_K) && $len_by_sum != $len_tok_K )
{
my $lno = defined($KK) ? $rLL->[$KK]->[_LINE_INDEX_] + 1 : "undef";
$KK = 'undef' unless defined($KK);
my $tok = $tokens_to_go[$i];
my $type = $types_to_go[$i];
Fault(<<EOM);
Summed lengths are appear to be incorrect. $msg
lengths disagree: token length by sum=$len_by_sum but token_length_to_go[$i] = $len_tok_i and rLL->[$KK]->[_TOKEN_LENGTH_]=$len_tok_K
near line $lno starting with '$tokens_to_go[0]..' at token i=$i K=$KK token_type='$type' token='$tok'
EOM
}
}
return;
} ## end sub check_batch_summed_lengths
{ ## begin closure set_vertical_alignment_markers
my %is_vertical_alignment_type;
my %is_not_vertical_alignment_token;
my %is_vertical_alignment_keyword;
my %is_terminal_alignment_type;
my %is_low_level_alignment_token;
BEGIN {
my @q;
# Replaced =~ and // in the list. // had been removed in RT 119588
@q = qw#
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
{ ? : => && || ~~ !~~ =~ !~ // <=> ->
#;
@is_vertical_alignment_type{@q} = (1) x scalar(@q);
# These 'tokens' are not aligned. We need this to remove [
# from the above list because it has type ='{'
@q = qw( [ );
@is_not_vertical_alignment_token{@q} = (1) x scalar(@q);
# these are the only types aligned at a line end
@q = qw( && || => );
@is_terminal_alignment_type{@q} = (1) x scalar(@q);
# these tokens only align at line level
@q = ( '{', '(' );
@is_low_level_alignment_token{@q} = (1) x scalar(@q);
# eq and ne were removed from this list to improve alignment chances
@q = qw( if unless and or err for foreach while until );
@is_vertical_alignment_keyword{@q} = (1) x scalar(@q);
} ## end BEGIN
# These are the main return variables. They are closure variables
# for efficient access by sub .._token_loop needs.
my $ralignment_type_to_go;
my $ralignment_counts;
sub set_vertical_alignment_markers {
my ($self) = @_;
# This routine looks at all output lines of a batch for certain tokens
# which can serve as vertical alignment markers (such as an '=').
# $ri_first = ref to list of starting line indexes in _to_go arrays
# $ri_last = ref to list of ending line indexes in _to_go arrays
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
# Method: We look at each token $i in this output batch and set
# $ralignment_type_to_go->[$i] equal to those tokens at which we would
# accept vertical alignment.
#----------------------------
# Initialize return variables
#----------------------------
$ralignment_type_to_go = [];
$ralignment_counts = [];
# NOTE: closing side comments can insert up to 2 additional tokens
# beyond the original $max_index_to_go, so we need to check ri_last for
# the last index.
my $max_line = @{$ri_first} - 1;
my $max_i = $ri_last->[$max_line];
if ( $max_i < $max_index_to_go ) { $max_i = $max_index_to_go }
# -----------------------------------------------------------------
# Shortcut:
# - no alignments if there is only 1 token.
# - and nothing to do if we aren't allowed to change whitespace.
# -----------------------------------------------------------------
if ( $max_i <= 0 || !$rOpts_add_whitespace ) {
goto RETURN;
}
# -------------------------------
# First handle any side comment.
# -------------------------------
my $i_terminal = $max_i;
if ( $types_to_go[$max_i] eq '#' ) {
# We know $max_i > 0 if we get here.
$i_terminal -= 1;
if ( $i_terminal > 0 && $types_to_go[$i_terminal] eq 'b' ) {
$i_terminal -= 1;
}
my $token = $tokens_to_go[$max_i];
my $KK = $K_to_go[$max_i];
my $is_closing_block = $types_to_go[$i_terminal] eq '}'
&& $tokens_to_go[$i_terminal] eq '}';
# Patch to check for asub closing side comments (c380)
# These follow '};' rather than bare '}'
$is_closing_block ||=
$types_to_go[$i_terminal] eq ';'
&& $i_terminal == $inext_to_go[0]
&& $types_to_go[0] eq '}'
&& $tokens_to_go[0] eq '}';
# Do not align various special side comments
my $do_not_align = (
# it is any specially marked side comment
( defined($KK) && $self->[_rspecial_side_comment_type_]->{$KK} )
# or it is a static side comment
|| ( $rOpts->{'static-side-comments'}
&& $token =~ /$static_side_comment_pattern/ )
# or a closing side comment
|| ( $is_closing_block
&& $token =~ /$closing_side_comment_prefix_pattern/ )
);
# - For the specific combination -vc -nvsc, we put all side comments
# at fixed locations. Note that we will lose hanging side comment
# alignments. Otherwise, hsc's can move to strange locations.
# - For -nvc -nvsc we make all side comments vertical alignments
# because the vertical aligner will check for -nvsc and be able
# to reduce the final padding to the side comments for long lines.
# and keep hanging side comments aligned.
if ( !$do_not_align
&& !$rOpts_valign_side_comments
&& $rOpts_valign_code )
{
$do_not_align = 1;
my $ipad = $max_i - 1;
if ( $types_to_go[$ipad] eq 'b' ) {
my $pad_spaces =
$rOpts->{'minimum-space-to-comment'} -
$token_lengths_to_go[$ipad];
$self->pad_token( $ipad, $pad_spaces );
}
}
if ( !$do_not_align ) {
$ralignment_type_to_go->[$max_i] = '#';
$ralignment_counts->[$max_line]++;
}
}
# ----------------------------------------------
# Nothing more to do on this line if -nvc is set
# ----------------------------------------------
if ( !$rOpts_valign_code ) {
goto RETURN;
}
# -------------------------------------
# Loop over each line of this batch ...
# -------------------------------------
foreach my $nline ( 0 .. $max_line ) {
my $ibeg = $ri_first->[$nline];
my $iend = $ri_last->[$nline];
next if ( $iend <= $ibeg );
# back up before any side comment
if ( $iend > $i_terminal ) { $iend = $i_terminal }
#----------------------------------
# Loop over all tokens on this line
#----------------------------------
$self->set_vertical_alignment_markers_token_loop( $nline, $ibeg,
$iend );
}
RETURN:
return ( $ralignment_type_to_go, $ralignment_counts );
} ## end sub set_vertical_alignment_markers
my %is_dot_question_colon;
BEGIN {
my @q = qw( . ? : );
@is_dot_question_colon{@q} = (1) x scalar(@q);
}
sub set_vertical_alignment_markers_token_loop {
my ( $self, $nline, $ibeg, $iend ) = @_;
# Input parameters:
# $nline = index of this line in the current batch
# $ibeg, $iend = index range of tokens to check in the _to_go arrays
# Task:
# Set vertical alignment markers for the tokens on one line
# of the current output batch. This is done by updating the
# three closure variables needed by sub 'make_alignment_patterns':
# $ralignment_type_to_go - alignment type of tokens, like '=', if any
# $ralignment_counts - number of alignment tokens in the line
my $level_beg = $levels_to_go[$ibeg];
my $token_beg = $tokens_to_go[$ibeg];
my $type_beg = $types_to_go[$ibeg];
my $last_vertical_alignment_BEFORE_index = -1;
my $vert_last_nonblank_type = $type_beg;
# ----------------------------------------------------------------
# Initialization code merged from 'sub delete_needless_alignments'
# ----------------------------------------------------------------
my $i_good_paren = -1;
my $i_elsif_close = $ibeg - 1;
my $i_elsif_open = $iend + 1;
my @imatch_list;
if ( $type_beg eq 'k' ) {
# Initialization for paren patch: mark a location of a paren we
# should keep, such as one following something like a leading
# 'if', 'elsif',
$i_good_paren = $ibeg + 1;
if ( $types_to_go[$i_good_paren] eq 'b' ) {
$i_good_paren++;
}
# Initialization for 'elsif' patch: remember the paren range of
# an elsif, and do not make alignments within them because this
# can cause loss of padding and overall brace alignment in the
# vertical aligner.
if ( $token_beg eq 'elsif'
&& $i_good_paren < $iend
&& $tokens_to_go[$i_good_paren] eq '(' )
{
$i_elsif_open = $i_good_paren;
$i_elsif_close = $mate_index_to_go[$i_good_paren];
if ( !defined($i_elsif_close) ) { $i_elsif_close = -1 }
}
}
# --------------------------------------------
# Loop over each token in this output line ...
# --------------------------------------------
my $type;
foreach my $i ( $ibeg + 1 .. $iend ) {
next if ( ( $type = $types_to_go[$i] ) eq 'b' );
my $token = $tokens_to_go[$i];
my $alignment_type;
# ----------------------------------------------
# Check for 'paren patch' : Remove excess parens
# ----------------------------------------------
# Excess alignment of parens can prevent other good alignments.
# For example, note the parens in the first two rows of the
# following snippet. They would normally get marked for
# alignment and aligned as follows:
# my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
# my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
# my $img = new Gimp::Image( $w, $h, RGB );
# This causes unnecessary paren alignment and prevents the
# third equals from aligning. If we remove the unwanted
# alignments we get:
# my $w = $columns * $cell_w + ( $columns + 1 ) * $border;
# my $h = $rows * $cell_h + ( $rows + 1 ) * $border;
# my $img = new Gimp::Image( $w, $h, RGB );
# A rule for doing this which works well is to remove alignment
# of parens whose containers do not contain other aligning
# tokens, with the exception that we always keep alignment of
# the first opening paren on a line (for things like 'if' and
# 'elsif' statements).
if ( $token eq ')' && @imatch_list ) {
# undo the corresponding opening paren if:
# - it is at the top of the stack
# - and not the first overall opening paren
# - does not follow a leading keyword on this line
my $imate = $mate_index_to_go[$i];
if ( !defined($imate) ) { $imate = -1 }
if ( $imatch_list[-1] eq $imate
&& ( $ibeg > 1 || @imatch_list > 1 )
&& $imate > $i_good_paren )
{
if ( $ralignment_type_to_go->[$imate] ) {
$ralignment_type_to_go->[$imate] = EMPTY_STRING;
$ralignment_counts->[$nline]--;
}
pop @imatch_list;
}
}
# do not align tokens at lower level than start of line
# except for side comments
if ( $levels_to_go[$i] < $level_beg ) {
next;
}
#--------------------------------------------------------
# First see if we want to align BEFORE this token
#--------------------------------------------------------
# The first possible token that we can align before
# is index 2 because: 1) it doesn't normally make sense to
# align before the first token and 2) the second
# token must be a blank if we are to align before
# the third
if ( $i < $ibeg + 2 ) { }
# must follow a blank token
elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
# otherwise, do not align two in a row to create a
# blank field
elsif ( $last_vertical_alignment_BEFORE_index == $i - 2 ) { }
# align before one of these keywords
# (within a line, since $i>1)
elsif ( $type eq 'k' ) {
# /^(if|unless|and|or|eq|ne)$/
if ( $is_vertical_alignment_keyword{$token} ) {
$alignment_type = $token;
# Align postfix 'unless' and 'if' if requested (git #116)
# These are the only equivalent keywords. For equivalent
# token types see '%operator_map'.
if ( $token eq 'unless' && $rOpts_valign_if_unless ) {
$alignment_type = 'if';
}
}
}
# align qw in a 'use' statement (issue git #93)
elsif ( $type eq 'q' ) {
if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] eq 'use' ) {
$alignment_type = $type;
}
}
# align before one of these types..
elsif ( $is_vertical_alignment_type{$type}
&& !$is_not_vertical_alignment_token{$token} )
{
$alignment_type = $token;
if ( $rOpts_valign_wide_equals && $is_assignment{$type} ) {
$alignment_type = '=';
}
# Do not align a terminal token. Although it might
# occasionally look ok to do this, this has been found to be
# a good general rule. The main problems are:
# (1) that the terminal token (such as an = or :) might get
# moved far to the right where it is hard to see because
# nothing follows it, and
# (2) doing so may prevent other good alignments.
# Current exceptions are && and || and =>
if ( $i == $iend ) {
$alignment_type = EMPTY_STRING
unless ( $is_terminal_alignment_type{$type} );
}
# Do not align leading ': (' or '. ('. This would prevent
# alignment in something like the following:
# $extra_space .=
# ( $input_line_number < 10 ) ? " "
# : ( $input_line_number < 100 ) ? " "
# : "";
# or
# $code =
# ( $case_matters ? $accessor : " lc($accessor) " )
# . ( $yesno ? " eq " : " ne " )
# Also, do not align a ( following a leading ? so we can
# align something like this:
# $converter{$_}->{ushortok} =
# $PDL::IO::Pic::biggrays
# ? ( m/GIF/ ? 0 : 1 )
# : ( m/GIF|RAST|IFF/ ? 0 : 1 );
if ( $i == $ibeg + 2
&& $is_dot_question_colon{$type_beg}
&& $types_to_go[ $i - 1 ] eq 'b' )
{
$alignment_type = EMPTY_STRING;
}
# Certain tokens only align at the same level as the
# initial line level
if ( $is_low_level_alignment_token{$token}
&& $levels_to_go[$i] != $level_beg )
{
$alignment_type = EMPTY_STRING;
}
if ( $token eq '(' ) {
# For a paren after keyword, only align if-like parens,
# such as:
# if ( $a ) { &a }
# elsif ( $b ) { &b }
# ^-------------------aligned parens
if ( $vert_last_nonblank_type eq 'k'
&& !$is_if_unless_elsif{ $tokens_to_go[ $i - 2 ] } )
{
$alignment_type = EMPTY_STRING;
}
# Do not align a spaced-function-paren if requested.
# Issue git #53, #73.
if ( !$rOpts_function_paren_vertical_alignment ) {
my $seqno = $type_sequence_to_go[$i];
$alignment_type = EMPTY_STRING
if ( $self->[_ris_function_call_paren_]->{$seqno} );
}
# make () align with qw in a 'use' statement (git #93)
if ( $tokens_to_go[0] eq 'use'
&& $types_to_go[0] eq 'k'
&& defined( $mate_index_to_go[$i] )
&& $mate_index_to_go[$i] == $i + 1 )
{
$alignment_type = 'q';
## Note on discussion git #101. We could make this
## a separate type '()' to separate it from qw's:
## $alignment_type =
## $rOpts_valign_empty_parens_with_qw ? 'q' : '()';
}
}
# be sure the alignment tokens are unique
# This experiment didn't work well: reason not determined
# if ($token ne $type) {$alignment_type .= $type}
}
# make qw() functions using -qwaf align 'use' statement
elsif ( $type eq 'U' ) {
if ( $types_to_go[0] eq 'k'
&& $tokens_to_go[0] eq 'use'
&& substr( $token, 0, 2 ) eq 'qw' )
{
$alignment_type = 'q';
}
}
else {
## not a special type
}
# NOTE: This is deactivated because it causes the previous
# if/elsif alignment to fail
#elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
#{ $alignment_type = $type; }
if ($alignment_type) {
$last_vertical_alignment_BEFORE_index = $i;
}
#--------------------------------------------------------
# Next see if we want to align AFTER the previous nonblank
#--------------------------------------------------------
# We want to line up ',' and interior ';' tokens, with the added
# space AFTER these tokens. (Note: interior ';' is included
# because it may occur in short blocks).
else {
if (
# previous token IS one of these:
(
$vert_last_nonblank_type eq ','
|| $vert_last_nonblank_type eq ';'
)
# and it follows a blank
&& $types_to_go[ $i - 1 ] eq 'b'
# and it's NOT one of these
&& !$is_closing_token{$type}
# then go ahead and align
)
{
$alignment_type = $vert_last_nonblank_type;
}
}
#-----------------------
# Set the alignment type
#-----------------------
if ($alignment_type) {
# but do not align the opening brace of an anonymous sub
if ( $token eq '{'
&& $block_type_to_go[$i]
&& $matches_ASUB{ $block_type_to_go[$i] } )
{
}
# and do not make alignments within 'elsif' parens
elsif ( $i > $i_elsif_open && $i < $i_elsif_close ) {
}
# and ignore any tokens which have leading padded spaces
# example: perl527/lop.t
elsif ( substr( $alignment_type, 0, 1 ) eq SPACE ) {
}
else {
$ralignment_type_to_go->[$i] = $alignment_type;
$ralignment_counts->[$nline]++;
push @imatch_list, $i;
}
}
$vert_last_nonblank_type = $type;
}
return;
} ## end sub set_vertical_alignment_markers_token_loop
} ## end closure set_vertical_alignment_markers
sub make_HSC_vertical_alignments {
# This is the alignment for a hanging side comment
my $rline_alignments;
#--------------------------------------
# Case 1: no alignments if -naws is set
#--------------------------------------
if ( !$rOpts_add_whitespace ) {
# Nothing to do if we are not allowed to add whitespace
$rline_alignments = [
[
[], [ SPACE . $tokens_to_go[0] ],
['#'],
[ 1 + $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ],
]
];
}
#-----------------------
# Case 2: -nvsc flag set
#-----------------------
# - For the specific combination -vc -nvsc, we put all side comments
# at fixed locations. Note that we will lose hanging side comment
# alignments. Otherwise, hsc's can move to strange locations.
# - For -nvc -nvsc we make all side comments vertical alignments
# because the vertical aligner will check for -nvsc and be able
# to reduce the final padding to the side comments for long lines.
# and keep hanging side comments aligned.
elsif ( !$rOpts_valign_side_comments && $rOpts_valign_code ) {
my $pad_spaces = $rOpts->{'minimum-space-to-comment'};
$rline_alignments = [
[
[],
[ SPACE x $pad_spaces . $tokens_to_go[0] ],
['q'],
[
$pad_spaces +
$summed_lengths_to_go[1] -
$summed_lengths_to_go[0]
],
]
];
}
#--------------------------------------
# Case 3: Normal case of no constraints
#--------------------------------------
# Originally, a hanging side comment line was constructed as three tokens:
# type 'q' with zero length,
# type 'b' with length 1
# type '#' with the text of the comment
# In this way, the comment became a true side comment through all of the
# tokenization operations. However, this caused a problem (c269) with subs
# K_next_* and K_previous_*, which would stop at the 'q' token. Rather
# than change those to skip an empty 'q', the hanging side comment was
# left as a block comment but the line was marked as 'HSC'. Only when
# we make the vertical alignments, right here, do we need to construct
# the artificial 'q', 'b', '#' sequence for the vertical aligner.
else {
$rline_alignments = [
[
['#'],
[ SPACE, $tokens_to_go[0] ],
[ 'qb', '#' ],
[ 1, $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ],
]
];
}
return $rline_alignments;
} ## end sub make_HSC_vertical_alignments
sub make_vertical_alignments {
my ($self) = @_;
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
# Given:
# $ri_first = ref to list of starting line indexes in _to_go arrays
# $ri_last = ref to list of ending line indexes in _to_go arrays
#----------------------------
# Shortcut for a single token
#----------------------------
if ( $max_index_to_go == 0 ) {
if ( @{$ri_first} == 1 && $ri_last->[0] == 0 ) {
my $rtokens = [];
my $rfields = [ $tokens_to_go[0] ];
my $rpatterns = [ $types_to_go[0] ];
my $rfield_lengths =
[ $summed_lengths_to_go[1] - $summed_lengths_to_go[0] ];
return [ [ $rtokens, $rfields, $rpatterns, $rfield_lengths ] ];
}
# Strange line packing, not fatal but should not happen
else {
if (DEVEL_MODE) {
my $max_line = @{$ri_first} - 1;
my $ibeg = $ri_first->[0];
my $iend = $ri_last->[0];
my $tok_b = $tokens_to_go[$ibeg];
my $tok_e = $tokens_to_go[$iend];
my $type_b = $types_to_go[$ibeg];
my $type_e = $types_to_go[$iend];
Fault(
"Strange..max_index=0 but nlines=$max_line ibeg=$ibeg tok=$tok_b type=$type_b iend=$iend tok=$tok_e type=$type_e; please check\n"
);
}
}
}
#---------------------------------------------------------
# Step 1: Define the alignment tokens for the entire batch
#---------------------------------------------------------
my ( $ralignment_type_to_go, $ralignment_counts );
# We only need to make this call if vertical alignment of code is
# requested or if a line might have a side comment.
if ( $rOpts_valign_code
|| $types_to_go[$max_index_to_go] eq '#' )
{
( $ralignment_type_to_go, $ralignment_counts ) =
$self->set_vertical_alignment_markers();
}
#----------------------------------------------
# Step 2: Break each line into alignment fields
#----------------------------------------------
my $rline_alignments = [];
my $max_line = @{$ri_first} - 1;
foreach my $line ( 0 .. $max_line ) {
my $ibeg = $ri_first->[$line];
my $iend = $ri_last->[$line];
my $rtok_fld_pat_len = $self->make_alignment_patterns(
$ibeg,
$iend,
$ralignment_type_to_go,
$ralignment_counts->[$line],
);
push @{$rline_alignments}, $rtok_fld_pat_len;
}
return $rline_alignments;
} ## end sub make_vertical_alignments
sub get_seqno {
my ( $self, $ii ) = @_;
# Get opening and closing sequence numbers of a token for the vertical
# aligner. Assign qw quotes a value to allow qw opening and closing tokens
# to be treated somewhat like opening and closing tokens for stacking
# tokens by the vertical aligner.
# Given:
# $ii = index of token in the output batch
my $rLL = $self->[_rLL_];
my $KK = $K_to_go[$ii];
my $seqno = $rLL->[$KK]->[_TYPE_SEQUENCE_];
if ( $rLL->[$KK]->[_TYPE_] eq 'q' ) {
my $SEQ_QW = -1;
my $token = $rLL->[$KK]->[_TOKEN_];
if ( $ii > 0 ) {
$seqno = $SEQ_QW if ( $token =~ /^qw\s*[\(\{\[]/ );
}
else {
# _ending_in_quote_ = true if line ends in quote
if ( !$self->[_this_batch_]->[_ending_in_quote_] ) {
$seqno = $SEQ_QW if ( $token =~ /[\)\}\]]$/ );
}
}
}
return ($seqno);
} ## end sub get_seqno
sub undo_contained_ci {
my ( $self, $ri_first, $ri_last ) = @_;
# Given:
# $ri_first = ref to list of starting line indexes in _to_go arrays
# $ri_last = ref to list of ending line indexes in _to_go arrays
# Undo ci for a sequence of lines in a container which all have both ci
# and a jump in level. Written for issue git #137. This mainly occurs
# in code with very long quotes when -nolq is set. Examples:
# diag( 'Test run performed at: '
# . DateTime->now
# . ' with Moose '
# . ( Moose->VERSION || 'git repo' ) );
# $d = sqrt( ( $x->[$x_l] - $x->[$x_r] )**2 +
# ( $y->[$x_l] - $y->[$x_r] )**2 );
# These all involve lines with ci within a complete container, where the
# batch ends in ');' or '];' or '};' with possible side comment. The
# opening container token does not end a line, and this causes the double
# jump.
my $max_line = @{$ri_first} - 1;
return if ( $max_line < 1 );
my $ibeg_max = $ri_first->[$max_line];
my $iend_max = $ri_last->[$max_line];
my $i_opening;
my $line_last;
# Look for Case 1: last line begins with ');'
if ( $is_closing_token{ $tokens_to_go[$ibeg_max] } ) {
my $i_n = $inext_to_go[$ibeg_max];
return if ( $i_n < $ibeg_max || $i_n > $iend_max );
return if ( $types_to_go[$i_n] ne ';' );
$i_opening = $mate_index_to_go[$ibeg_max];
return if ( !defined($i_opening) || $i_opening <= 0 );
$line_last = $max_line - 1;
}
# Look for Case 2: last line has some text which ends with ');'
else {
my $i_t = $iend_max;
if ( $types_to_go[$i_t] eq '#' ) {
$i_t = iprev_to_go($i_t);
}
return if ( $i_t <= $ibeg_max );
return if ( $types_to_go[$i_t] ne ';' );
$i_t = iprev_to_go($i_t);
return if ( $i_t <= $ibeg_max );
return if ( !$is_closing_token{ $tokens_to_go[$i_t] } );
$i_opening = $mate_index_to_go[$i_t];
return if ( !defined($i_opening) || $i_opening < 0 );
$line_last = $max_line;
}
# Scan backwards to the line with the opening container,
# looking for a set of lines with ci to remove which have
# the same level and ci as the final line of the group
my $ibeg_last = $ri_first->[$line_last];
my $level_last = $levels_to_go[$ibeg_last];
return unless ( $ci_levels_to_go[$ibeg_last] );
# do not change ci under -lp control
return if ( ref( $reduced_spaces_to_go[$ibeg_last] ) );
my $line_start = $line_last;
foreach my $line ( reverse( 0 .. $line_last ) ) {
my $ibeg = $ri_first->[$line];
return if ( ref( $reduced_spaces_to_go[$ibeg] ) );
last if ( !$ci_levels_to_go[$ibeg] );
last if ( $levels_to_go[$ibeg] != $level_last );
$line_start = $line;
}
# There must be a jump in level and ci from the line before the start,
# and it must contain the opening container token.
my $line_o = $line_start - 1;
return if ( $line_o < 0 );
my $ibeg_o = $ri_first->[$line_o];
my $iend_o = $ri_last->[$line_o];
return if ( $ci_levels_to_go[$ibeg_o] );
return if ( $levels_to_go[$ibeg_o] >= $level_last );
return if ( $i_opening < $ibeg_o || $i_opening > $iend_o );
# ok to undo the ci of this group
foreach my $line_t ( $line_start .. $line_last ) {
my $ibeg_t = $ri_first->[$line_t];
$ci_levels_to_go[$ibeg_t] = 0;
$leading_spaces_to_go[$ibeg_t] = $reduced_spaces_to_go[$ibeg_t];
}
return;
} ## end sub undo_contained_ci
{
my %undo_extended_ci;
sub initialize_undo_ci {
%undo_extended_ci = ();
return;
}
sub undo_ci {
# Undo continuation indentation in certain sequences
my ($self) = @_;
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
my $rix_seqno_controlling_ci =
$this_batch->[_rix_seqno_controlling_ci_];
# Given:
# $ri_first = ref to list of starting line indexes in _to_go arrays
# $ri_last = ref to list of ending line indexes in _to_go arrays
# $rix_seqno_controlling_ci = a control array
my ( $line_1, $line_2 );
my $max_line = @{$ri_first} - 1;
my $rseqno_controlling_my_ci = $self->[_rseqno_controlling_my_ci_];
# Prepare a list of controlling indexes for each line if required.
# This is used for efficient processing below. Note: this is
# critical for speed. In the initial implementation I just looped
# through the @$rix_seqno_controlling_ci list below. Using NYT_prof, I
# found that this routine was causing a huge run time in large lists.
# On a very large list test case, this new coding dropped the run time
# of this routine from 30 seconds to 169 milliseconds.
my @i_controlling_ci;
if ( $rix_seqno_controlling_ci && @{$rix_seqno_controlling_ci} ) {
my @tmp = reverse @{$rix_seqno_controlling_ci};
my $ix_next = pop @tmp;
foreach my $line ( 0 .. $max_line ) {
my $iend = $ri_last->[$line];
while ( defined($ix_next) && $ix_next <= $iend ) {
push @{ $i_controlling_ci[$line] }, $ix_next;
$ix_next = pop @tmp;
}
}
}
# Loop over all lines of the batch ...
# Workaround originally created for problem c007, in which the
# combination -lp -xci could produce a "Program bug" message in unusual
# circumstances.
my $skip_SECTION_1;
if ( $rOpts_line_up_parentheses
&& $rOpts_extended_continuation_indentation )
{
# Only set this flag if -lp is actually used here
foreach my $line ( 0 .. $max_line ) {
my $ibeg = $ri_first->[$line];
if ( ref( $leading_spaces_to_go[$ibeg] ) ) {
$skip_SECTION_1 = 1;
last;
}
}
}
my $line_double_jump;
foreach my $line ( 0 .. $max_line ) {
my $ibeg = $ri_first->[$line];
my $iend = $ri_last->[$line];
#-----------------------------------
# SECTION 1: Undo needless common CI
#-----------------------------------
# We are looking at leading tokens and looking for a sequence all
# at the same level and all at a higher level than enclosing lines.
# For example, we can undo continuation indentation in sort/map/grep
# chains
# my $dat1 = pack( "n*",
# map { $_, $lookup->{$_} }
# sort { $a <=> $b }
# grep { $lookup->{$_} ne $default } keys %$lookup );
# to become
# my $dat1 = pack( "n*",
# map { $_, $lookup->{$_} }
# sort { $a <=> $b }
# grep { $lookup->{$_} ne $default } keys %$lookup );
if ( $line && !$skip_SECTION_1 ) {
my $ibeg_last = $ri_first->[ $line - 1 ];
my $lev = $levels_to_go[$ibeg];
my $lev_last = $levels_to_go[$ibeg_last];
# set flag for calling undo_contained_ci
if ( $lev == $lev_last + 1
&& $ci_levels_to_go[$ibeg]
&& !$ci_levels_to_go[$ibeg_last] )
{
$line_double_jump = $line;
}
# if we have started a chain..
if ($line_1) {
# see if it continues..
if ( $lev == $lev_last ) {
if ( $types_to_go[$ibeg] eq 'k'
&& $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
{
# chain continues...
# check for chain ending at end of a statement
my $is_semicolon_terminated = (
$line == $max_line
&& (
$types_to_go[$iend] eq ';'
# with possible side comment
|| ( $types_to_go[$iend] eq '#'
&& $iend - $ibeg >= 2
&& $types_to_go[ $iend - 2 ] eq ';'
&& $types_to_go[ $iend - 1 ] eq 'b' )
)
);
$line_2 = $line
if ($is_semicolon_terminated);
}
else {
# kill chain
$line_1 = undef;
}
}
elsif ( $lev < $lev_last ) {
# chain ends with previous line
$line_2 = $line - 1;
}
else {
# ( $lev > $lev_last )
# kill chain
$line_1 = undef;
}
# undo the continuation indentation if a chain ends
if ( defined($line_2) && defined($line_1) ) {
my $continuation_line_count = $line_2 - $line_1 + 1;
@ci_levels_to_go[ @{$ri_first}[ $line_1 .. $line_2 ] ]
= (0) x ($continuation_line_count)
if ( $continuation_line_count >= 0 );
@leading_spaces_to_go[ @{$ri_first}
[ $line_1 .. $line_2 ] ] =
@reduced_spaces_to_go[ @{$ri_first}
[ $line_1 .. $line_2 ] ];
$line_1 = undef;
}
}
# not in a chain yet..
else {
# look for start of a new sort/map/grep chain
if ( $lev > $lev_last ) {
if ( $types_to_go[$ibeg] eq 'k'
&& $is_sort_map_grep{ $tokens_to_go[$ibeg] } )
{
$line_1 = $line;
}
}
}
}
#-------------------------------------
# SECTION 2: Undo ci at cuddled blocks
#-------------------------------------
# Note that sub get_final_indentation will be called later to
# actually do this, but for now we will tentatively mark cuddled
# lines with ci=0 so that the the -xci loop which follows will be
# correct at cuddles.
if (
$types_to_go[$ibeg] eq '}'
&& ( $nesting_depth_to_go[$iend] + 1 ==
$nesting_depth_to_go[$ibeg] )
)
{
my $terminal_type = $types_to_go[$iend];
if ( $terminal_type eq '#' && $iend > $ibeg ) {
$terminal_type = $types_to_go[ $iend - 1 ];
if ( $terminal_type eq '#' && $iend - 1 > $ibeg ) {
$terminal_type = $types_to_go[ $iend - 2 ];
}
}
# Patch for rt144979, part 2. Coordinated with part 1.
# Skip cuddled braces.
my $seqno_beg = $type_sequence_to_go[$ibeg];
my $is_cuddled_closing_brace = $seqno_beg
&& $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
if ( $terminal_type eq '{' && !$is_cuddled_closing_brace ) {
$ci_levels_to_go[$ibeg] = 0;
}
}
#--------------------------------------------------------
# SECTION 3: Undo ci set by sub extended_ci if not needed
#--------------------------------------------------------
# Undo the ci of the leading token if its controlling token
# went out on a previous line without ci
if ( $ci_levels_to_go[$ibeg] ) {
my $Kbeg = $K_to_go[$ibeg];
my $seqno = $rseqno_controlling_my_ci->{$Kbeg};
if ( $seqno && $undo_extended_ci{$seqno} ) {
# but do not undo ci set by the -lp flag
if ( !ref( $reduced_spaces_to_go[$ibeg] ) ) {
$ci_levels_to_go[$ibeg] = 0;
$leading_spaces_to_go[$ibeg] =
$reduced_spaces_to_go[$ibeg];
}
}
}
# Flag any controlling opening tokens in lines without ci. This
# will be used later in the above if statement to undo the ci which
# they added. The array i_controlling_ci[$line] was prepared at
# the top of this routine.
if ( !$ci_levels_to_go[$ibeg]
&& defined( $i_controlling_ci[$line] ) )
{
foreach my $i ( @{ $i_controlling_ci[$line] } ) {
my $seqno = $type_sequence_to_go[$i];
$undo_extended_ci{$seqno} = 1;
}
}
}
#-------------------------------------
# Undo ci in containers if -mci is set
#-------------------------------------
if ( $line_double_jump && $rOpts_minimize_continuation_indentation ) {
$self->undo_contained_ci( $ri_first, $ri_last );
}
return;
} ## end sub undo_ci
}
{ ## begin closure set_logical_padding
my %is_math_op;
BEGIN {
my @q = qw( + - * / );
@is_math_op{@q} = (1) x scalar(@q);
}
sub set_logical_padding {
my ($self) = @_;
# Look at a batch of lines and see if extra padding can improve the
# alignment when there are certain leading operators. Here is an
# example, in which some extra space is introduced before
# '( $year' to make it line up with the subsequent lines:
#
# if ( ( $Year < 1601 )
# || ( $Year > 2899 )
# || ( $EndYear < 1601 )
# || ( $EndYear > 2899 ) )
# {
# &Error_OutOfRange;
# }
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
my $starting_in_quote = $this_batch->[_starting_in_quote_];
my $max_line = @{$ri_first} - 1;
my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $pad_spaces,
$tok_next, $type_next, $has_leading_op_next, $has_leading_op );
# Patch to produce padding in the first line of short code blocks.
# This is part of an update to fix cases b562 .. b983.
# This is needed to compensate for a change which was made in 'sub
# starting_one_line_block' to prevent blinkers. Previously, that sub
# would not look at the total block size and rely on sub
# break_long_lines to break up long blocks. Consequently, the
# first line of those batches would end in the opening block brace of a
# sort/map/grep/eval block. When this was changed to immediately check
# for blocks which were too long, the opening block brace would go out
# in a single batch, and the block contents would go out as the next
# batch. This caused the logic in this routine which decides if the
# first line should be padded to be incorrect. To fix this, we set a
# flag if the previous batch ended in an opening sort/map/grep/eval
# block brace, and use it to adjust the logic to compensate.
# For example, the following would have previously been a single batch
# but now is two batches. We want to pad the line starting in '$dir':
# my (@indices) = # batch n-1 (prev batch n)
# sort { # batch n-1 (prev batch n)
# $dir eq 'left' # batch n
# ? $cells[$a] <=> $cells[$b] # batch n
# : $cells[$b] <=> $cells[$a]; # batch n
# } ( 0 .. $#cells ); # batch n
my $rLL = $self->[_rLL_];
my $rblock_type_of_seqno = $self->[_rblock_type_of_seqno_];
my $is_short_block;
if ( $K_to_go[0] > 0 ) {
my $Kp = $K_to_go[0] - 1;
if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
$Kp -= 1;
}
if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
$Kp -= 1;
if ( $Kp > 0 && $rLL->[$Kp]->[_TYPE_] eq 'b' ) {
$Kp -= 1;
}
}
my $seqno = $rLL->[$Kp]->[_TYPE_SEQUENCE_];
if ($seqno) {
my $block_type = $rblock_type_of_seqno->{$seqno};
if ($block_type) {
$is_short_block = $is_sort_map_grep_eval{$block_type};
$is_short_block ||= $want_one_line_block{$block_type};
}
}
}
# looking at each line of this batch..
foreach my $line ( 0 .. $max_line - 1 ) {
# see if the next line begins with a logical operator
$ibeg = $ri_first->[$line];
$iend = $ri_last->[$line];
$ibeg_next = $ri_first->[ $line + 1 ];
$tok_next = $tokens_to_go[$ibeg_next];
$type_next = $types_to_go[$ibeg_next];
$has_leading_op_next = ( $type_next eq 'k' )
? $is_chain_operator{$tok_next} # and, or
: $is_chain_operator{$type_next}; # + - * / : ? && ||
# Fix for git134
if ( !$has_leading_op_next
&& $iend > $ibeg + 2
&& $types_to_go[ $ibeg + 1 ] eq 'b'
&& $is_opening_type{ $types_to_go[$ibeg] }
&& $nesting_depth_to_go[$iend] > $nesting_depth_to_go[$ibeg] )
{
my $iend_next = $ri_last->[ $line + 1 ];
$self->pad_broken_list( $ibeg, $iend, $ibeg_next, $iend_next );
}
next unless ($has_leading_op_next);
# next line must not be at lesser depth
next
if ( $nesting_depth_to_go[$ibeg] >
$nesting_depth_to_go[$ibeg_next] );
# identify the token in this line to be padded on the left
$ipad = undef;
# handle lines at same depth...
if ( $nesting_depth_to_go[$ibeg] ==
$nesting_depth_to_go[$ibeg_next] )
{
# if this is not first line of the batch ...
if ( $line > 0 ) {
# and we have leading operator..
next if $has_leading_op;
# Introduce padding if..
# 1. the previous line is at lesser depth, or
# 2. the previous line ends in an assignment
# 3. the previous line ends in a 'return'
# 4. the previous line ends in a comma
# Example 1: previous line at lesser depth
# if ( ( $Year < 1601 ) # <- we are here but
# || ( $Year > 2899 ) # list has not yet
# || ( $EndYear < 1601 ) # collapsed vertically
# || ( $EndYear > 2899 ) )
# {
#
# Example 2: previous line ending in assignment:
# $leapyear =
# $year % 4 ? 0 # <- We are here
# : $year % 100 ? 1
# : $year % 400 ? 0
# : 1;
#
# Example 3: previous line ending in comma:
# push @expr,
# /test/ ? undef
# : eval($_) ? 1
# : eval($_) ? 1
# : 0;
# be sure levels agree (never indent after an indented 'if')
next
if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
# allow padding on first line after a comma but only if:
# (1) this is line 2 and
# (2) there are at more than three lines and
# (3) lines 3 and 4 have the same leading operator
# These rules try to prevent padding within a long
# comma-separated list.
my $ok_comma;
if ( $types_to_go[$iendm] eq ','
&& $line == 1
&& $max_line > 2 )
{
my $ibeg_next_next = $ri_first->[ $line + 2 ];
my $tok_next_next = $tokens_to_go[$ibeg_next_next];
$ok_comma = $tok_next_next eq $tok_next;
}
my $ok_pad = (
$is_assignment{ $types_to_go[$iendm] }
|| $ok_comma
|| ( $nesting_depth_to_go[$ibegm] <
$nesting_depth_to_go[$ibeg] )
|| ( $types_to_go[$iendm] eq 'k'
&& $tokens_to_go[$iendm] eq 'return' )
);
next if ( !$ok_pad );
# we will add padding before the first token
$ipad = $ibeg;
}
# for first line of the batch..
else {
# WARNING: Never indent if first line is starting in a
# continued quote, which would change the quote.
next if $starting_in_quote;
# if this is text after closing '}'
# then look for an interior token to pad
if ( $types_to_go[$ibeg] eq '}' ) {
}
# otherwise, we might pad if it looks really good
elsif ($is_short_block) {
$ipad = $ibeg;
}
else {
# we might pad token $ibeg, so be sure that it
# is at the same depth as the next line.
next
if ( $nesting_depth_to_go[$ibeg] !=
$nesting_depth_to_go[$ibeg_next] );
# We can pad on line 1 of a statement if at least 3
# lines will be aligned. Otherwise, it
# can look very confusing.
# We have to be careful not to pad if there are too few
# lines. The current rule is:
# (1) in general we require at least 3 consecutive lines
# with the same leading chain operator token,
# (2) but an exception is that we only require two lines
# with leading colons if there are no more lines. For example,
# the first $i in the following snippet would get padding
# by the second rule:
#
# $i == 1 ? ( "First", "Color" )
# : $i == 2 ? ( "Then", "Rarity" )
# : ( "Then", "Name" );
next if ( $max_line <= 1 );
my $leading_token = $tokens_to_go[$ibeg_next];
my $tokens_differ;
# never indent line 1 of a '.' series because
# previous line is most likely at same level.
# TODO: we should also look at the leading_spaces
# of the last output line and skip if it is same
# as this line.
next if ( $leading_token eq '.' );
my $count = 1;
foreach my $l ( 2 .. 3 ) {
last if ( $line + $l > $max_line );
$count++;
my $ibeg_next_next = $ri_first->[ $line + $l ];
next
if ( $tokens_to_go[$ibeg_next_next] eq
$leading_token );
$tokens_differ = 1;
last;
}
next if ($tokens_differ);
next if ( $count < 3 && $leading_token ne ':' );
$ipad = $ibeg;
}
}
}
# find interior token to pad if necessary
if ( !defined($ipad) ) {
foreach my $i ( $ibeg .. $iend - 1 ) {
# find any unclosed container
next
if ( !$type_sequence_to_go[$i]
|| !defined( $mate_index_to_go[$i] )
|| $mate_index_to_go[$i] <= $iend );
# find next nonblank token to pad
$ipad = $inext_to_go[$i];
last if $ipad;
}
last if ( !$ipad || $ipad > $iend );
}
# We cannot pad the first leading token of a file because
# it could cause a bug in which the starting indentation
# level is guessed incorrectly each time the code is run
# though perltidy, thus causing the code to march off to
# the right. For example, the following snippet would have
# this problem:
## ov_method mycan( $package, '(""' ), $package
## or ov_method mycan( $package, '(0+' ), $package
## or ov_method mycan( $package, '(bool' ), $package
## or ov_method mycan( $package, '(nomethod' ), $package;
# If this snippet is within a block this won't happen
# unless the user just processes the snippet alone within
# an editor. In that case either the user will see and
# fix the problem or it will be corrected next time the
# entire file is processed with perltidy.
my $peak_batch_size = $this_batch->[_peak_batch_size_];
next if ( $ipad == 0 && $peak_batch_size <= 1 );
# next line must not be at greater depth
my $iend_next = $ri_last->[ $line + 1 ];
next
if ( $nesting_depth_to_go[ $iend_next + 1 ] >
$nesting_depth_to_go[$ipad] );
# lines must be somewhat similar to be padded..
my $inext_next = $inext_to_go[$ibeg_next];
my $type = $types_to_go[$ipad];
# see if there are multiple continuation lines
my $logical_continuation_lines = 1;
if ( $line + 2 <= $max_line ) {
my $leading_token = $tokens_to_go[$ibeg_next];
my $ibeg_next_next = $ri_first->[ $line + 2 ];
if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
&& $nesting_depth_to_go[$ibeg_next] eq
$nesting_depth_to_go[$ibeg_next_next] )
{
$logical_continuation_lines++;
}
}
# see if leading types match
my $types_match = $types_to_go[$inext_next] eq $type;
my $matches_without_bang;
# if first line has leading ! then compare the following token
if ( !$types_match && $type eq '!' ) {
$types_match = $matches_without_bang =
$types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
}
if (
# either we have multiple continuation lines to follow
# and we are not padding the first token
(
$logical_continuation_lines > 1
&& ( $ipad > 0 || $is_short_block )
)
# or..
|| (
# types must match
$types_match
# and keywords must match if keyword
&& !(
$type eq 'k'
&& $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
)
)
)
{
#----------------------begin special checks--------------
#
# SPECIAL CHECK 1:
# A check is needed before we can make the pad.
# If we are in a list with some long items, we want each
# item to stand out. So in the following example, the
# first line beginning with '$casefold->' would look good
# padded to align with the next line, but then it
# would be indented more than the last line, so we
# won't do it.
#
# ok(
# $casefold->{code} eq '0041'
# && $casefold->{status} eq 'C'
# && $casefold->{mapping} eq '0061',
# 'casefold 0x41'
# );
#
# Note:
# It would be faster, and almost as good, to use a comma
# count, and not pad if comma_count > 1 and the previous
# line did not end with a comma.
#
my $ok_to_pad = 1;
my $ibg = $ri_first->[ $line + 1 ];
my $depth = $nesting_depth_to_go[ $ibg + 1 ];
# just use simplified formula for leading spaces to avoid
# needless sub calls
my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
# look at each line beyond the next ..
my $l = $line + 1;
foreach my $ltest ( $line + 2 .. $max_line ) {
$l = $ltest;
my $ibeg_t = $ri_first->[$l];
# quit looking at the end of this container
last
if ( $nesting_depth_to_go[ $ibeg_t + 1 ] < $depth )
|| ( $nesting_depth_to_go[$ibeg_t] < $depth );
# cannot do the pad if a later line would be
# outdented more
if ( $levels_to_go[$ibeg_t] + $ci_levels_to_go[$ibeg_t] <
$lsp )
{
$ok_to_pad = 0;
last;
}
}
# don't pad if we end in a broken list
if ( $l == $max_line ) {
my $i2 = $ri_last->[$l];
if ( $types_to_go[$i2] eq '#' ) {
my $i1 = $ri_first->[$l];
next if terminal_type_i( $i1, $i2 ) eq ',';
}
}
# SPECIAL CHECK 2:
# a minus may introduce a quoted variable, and we will
# add the pad only if this line begins with a bare word,
# such as for the word 'Button' here:
# [
# Button => "Print letter \"~$_\"",
# -command => [ sub { print "$_[0]\n" }, $_ ],
# -accelerator => "Meta+$_"
# ];
#
# On the other hand, if 'Button' is quoted, it looks best
# not to pad:
# [
# 'Button' => "Print letter \"~$_\"",
# -command => [ sub { print "$_[0]\n" }, $_ ],
# -accelerator => "Meta+$_"
# ];
if ( $types_to_go[$ibeg_next] eq 'm' ) {
$ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
}
next unless $ok_to_pad;
#----------------------end special check---------------
my $length_1 = total_line_length( $ibeg, $ipad - 1 );
my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
$pad_spaces = $length_2 - $length_1;
# If the first line has a leading ! and the second does
# not, then remove one space to try to align the next
# leading characters, which are often the same. For example:
# if ( !$ts
# || $ts == $self->Holder
# || $self->Holder->Type eq "Arena" )
#
# This usually helps readability, but if there are subsequent
# ! operators things will still get messed up. For example:
#
# if ( !exists $Net::DNS::typesbyname{$qtype}
# && exists $Net::DNS::classesbyname{$qtype}
# && !exists $Net::DNS::classesbyname{$qclass}
# && exists $Net::DNS::typesbyname{$qclass} )
# We can't fix that.
if ($matches_without_bang) { $pad_spaces-- }
# make sure this won't change if -lp is used
my $indentation_1 = $leading_spaces_to_go[$ibeg];
if ( ref($indentation_1)
&& $indentation_1->get_recoverable_spaces() == 0 )
{
my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
if ( ref($indentation_2)
&& $indentation_2->get_recoverable_spaces() != 0 )
{
$pad_spaces = 0;
}
}
# we might be able to handle a pad of -1 by removing a blank
# token.
if ( $pad_spaces < 0 ) {
# Deactivated for -kpit due to conflict. This block deletes
# a space in an attempt to improve alignment in some cases,
# but it may conflict with user spacing requests. For now
# it is just deactivated if the -kpit option is used.
if ( $pad_spaces == -1 ) {
if (
$ipad > $ibeg
&& $types_to_go[ $ipad - 1 ] eq 'b'
&& !%keyword_paren_inner_tightness
# additional tests added for c385:
&& (
$types_to_go[$inext_next] eq $types_to_go[$ipad]
|| (
$types_to_go[$ipad] eq '!'
&& ( $types_to_go[ $ipad + 1 ] eq
$types_to_go[$inext_next] )
)
)
)
{
$self->pad_token( $ipad - 1, $pad_spaces );
}
}
$pad_spaces = 0;
}
# now apply any padding for alignment
if ( $ipad >= 0 && $pad_spaces ) {
my $length_t = total_line_length( $ibeg, $iend );
if ( $pad_spaces + $length_t <=
$maximum_line_length_at_level[ $levels_to_go[$ibeg] ] )
{
$self->pad_token( $ipad, $pad_spaces );
}
}
}
}
continue {
$iendm = $iend;
$ibegm = $ibeg;
$has_leading_op = $has_leading_op_next;
} ## end of loop over lines
return;
} ## end sub set_logical_padding
} ## end closure set_logical_padding
sub pad_broken_list {
my ( $self, $ibeg, $iend, $ibeg_next, $iend_next ) = @_;
# Given:
# $ibeg, $iend = index range of line to get padding
# $ibeg_next, $iend_next = index range of next line
# This fixes a minor issue discussed in git134. In the example shown
# below, the is a broken list because of the q term, so line breaks
# are copied from the input. We want to insert padding at
# '[ $clientKey,' to align with the next line.
# $q->do(
# q{
# Something
# },
# [ $clientKey, ## <-- pad spaces needed here
# $systemKey,
# ],
# );
# Notation for the line being padded:
#
# [ $clientKey,
# | | |
# | | ------- $iend
# | ------ $ibeg+2
# ---- $ibeg
# NOTES:
# - This particular list is broken because of the 'q' term in the list
# - It is extremely rare for this routine to be called for typical code
# (I found just two examples in my large collection of test scripts)
# - This routine is not called for the last line of a batch. This
# is not necessary because perltidy will generally put a break
# after the opening token in that case.
# The basic logic is to pad the first blank space $ibeg+1 using the
# leading spaces that would have been given to token at $ibeg+2 if:
# - this line begins with an opening token which is
# - followed by additional tokens on the same line,
# - and is a list container, and
# - the line terminates in a comma whose parent is this container,
# - then pad using the indentation of the second token
# So in other words, we are simulating doing a line break after the
# first token and then recombining with a -vt operation. That cannot
# actually happen for a broken list.
# Next token must be blank for padding, and must be followed
# by at least one token and comma
return if ( $iend < $ibeg + 3 || $types_to_go[ $ibeg + 1 ] ne 'b' );
# This is only for lists
my $seqno = $type_sequence_to_go[$ibeg];
return if ( !$seqno );
my $is_list = $self->[_ris_list_by_seqno_]->{$seqno};
return if ( !$is_list );
# First token on next line must be in same container
my $seqno_beg_next = $parent_seqno_to_go[$ibeg_next];
return if ( !$seqno_beg_next || $seqno_beg_next != $seqno );
# This does not work well if the closing token is on the next line
return
if ( !defined( $mate_index_to_go[$ibeg] )
|| $mate_index_to_go[$ibeg] <= $iend_next );
# Line must end in a comma, with possible side comment
my $i_terminal = $iend;
if ( $types_to_go[$i_terminal] eq '#' ) {
$i_terminal -= 1;
if ( $types_to_go[$i_terminal] eq 'b' ) {
$i_terminal -= 1;
}
}
return if ( $i_terminal < $ibeg + 2 );
return if ( $types_to_go[$i_terminal] ne ',' );
# add padding to make the second token have the same location
# as if it had been output separately and later joined with -vt
my $lsp = $leading_spaces_to_go[$ibeg];
my $lsp_next = $leading_spaces_to_go[$ibeg_next];
# this is not for -lp style
return if ( ref($lsp) || ref($lsp_next) );
my $pad_spaces =
$lsp_next -
( $lsp + $token_lengths_to_go[$ibeg] +
$token_lengths_to_go[ $ibeg + 1 ] );
return if ( $pad_spaces <= 0 );
# Do not pad if it will cause excess line length
my $excess = $self->excess_line_length( $ibeg, $iend );
return if ( $excess + $pad_spaces > 0 );
$self->pad_token( $ibeg + 1, $pad_spaces );
return;
} ## end sub pad_broken_list
sub pad_token {
# insert $pad_spaces before token number $ipad
my ( $self, $ipad, $pad_spaces ) = @_;
my $rLL = $self->[_rLL_];
my $KK = $K_to_go[$ipad];
my $tok = $rLL->[$KK]->[_TOKEN_];
my $tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_];
if ( $pad_spaces > 0 ) {
$tok = SPACE x $pad_spaces . $tok;
$tok_len += $pad_spaces;
}
elsif ( $pad_spaces == 0 ) {
return;
}
elsif ( $pad_spaces == -1 && $tokens_to_go[$ipad] eq SPACE ) {
$tok = EMPTY_STRING;
$tok_len = 0;
}
else {
# shouldn't happen
DEVEL_MODE
&& Fault("unexpected request for pad spaces = $pad_spaces\n");
return;
}
$tok = $rLL->[$KK]->[_TOKEN_] = $tok;
$tok_len = $rLL->[$KK]->[_TOKEN_LENGTH_] = $tok_len;
$token_lengths_to_go[$ipad] += $pad_spaces;
$tokens_to_go[$ipad] = $tok;
foreach my $i ( $ipad .. $max_index_to_go ) {
$summed_lengths_to_go[ $i + 1 ] += $pad_spaces;
}
return;
} ## end sub pad_token
sub xlp_tweak {
my ($self) = @_;
# Remove one indentation space from unbroken containers marked with
# 'K_extra_space'. These are mostly two-line lists with short names
# formatted with -xlp -pt=2.
#
# Before this fix (extra space in line 2):
# is($module->VERSION, $expected,
# "$main_module->VERSION matches $module->VERSION ($expected)");
#
# After this fix:
# is($module->VERSION, $expected,
# "$main_module->VERSION matches $module->VERSION ($expected)");
#
# Notes:
# - This fixes issue git #106
# - This must be called after 'set_logical_padding'.
# - This is currently only applied to -xlp. It would also work for -lp
# but that style is essentially frozen.
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
# Must be 2 or more lines
return if ( @{$ri_first} <= 1 );
# Pull indentation object from start of second line
my $ibeg_1 = $ri_first->[1];
my $lp_object = $leading_spaces_to_go[$ibeg_1];
return if ( !ref($lp_object) );
# This only applies to an indentation object with a marked token
my $K_extra_space = $lp_object->get_K_extra_space();
return unless ($K_extra_space);
# Look for the marked token within the first line of this batch
my $ibeg_0 = $ri_first->[0];
my $iend_0 = $ri_last->[0];
my $ii = $ibeg_0 + $K_extra_space - $K_to_go[$ibeg_0];
return if ( $ii <= $ibeg_0 || $ii > $iend_0 );
# Skip padded tokens, they have already been aligned
my $tok = $tokens_to_go[$ii];
return if ( substr( $tok, 0, 1 ) eq SPACE );
# Skip 'if'-like statements, this does not improve them
return
if ( $types_to_go[$ibeg_0] eq 'k'
&& $is_if_unless_elsif{ $tokens_to_go[$ibeg_0] } );
# Looks okay, reduce indentation by 1 space if possible
my $spaces = $lp_object->get_spaces();
if ( $spaces > 0 ) {
$lp_object->decrease_SPACES(1);
}
return;
} ## end sub xlp_tweak
{ ## begin closure make_alignment_patterns
my %keyword_map;
my %operator_map;
my %is_k_w_n_C_bang;
my %is_my_local_our;
my %is_use_like;
my %is_binary_type;
my %is_binary_keyword;
my %name_map;
BEGIN {
# Note: %block_type_map is now global to enable the -gal=s option
# Map certain keywords to the same 'if' class to align
# long if/elsif sequences. [elsif.pl]. But note that this is
# only for purposes of making the patterns, not alignment tokens.
# The only possible equivalent alignment tokens are 'if' and 'unless',
# and this is handled earlier under control of $rOpts_valign_if_unless
# to avoid making this a global hash.
%keyword_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'given',
'default' => 'given',
'case' => 'switch',
# treat an 'undef' similar to numbers and quotes
'undef' => 'Q',
);
# Map certain operators to the same class for alignment.
# Note that this map is for the alignment tokens, not the patterns.
# We could have placed 'unless' => 'if' here, but since that is
# under control of $rOpts_valign_if_unless, it is handled elsewhere.
%operator_map = (
'!~' => '=~',
'+=' => '+=',
'-=' => '+=',
'*=' => '+=',
'/=' => '+=',
);
%is_k_w_n_C_bang = (
'k' => 1,
'w' => 1,
'n' => 1,
'C' => 1,
'!' => 1,
);
# leading keywords which to skip for efficiency when making parenless
# container names
my @q = qw( my local our return );
@is_my_local_our{@q} = (1) x scalar(@q);
# leading keywords where we should just join one token to form
# parenless name
@q = qw( use );
@is_use_like{@q} = (1) x scalar(@q);
# token types which prevent using leading word as a container name
@q = qw{
x / : % . | ^ < = > || >= != *= => !~ == && |= .= -= =~ += <=
%= ^= x= ~~ ** << /= &= // >> ~. &. |. ^.
**= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
};
push @q, ',';
@is_binary_type{@q} = (1) x scalar(@q);
# token keywords which prevent using leading word as a container name
@q = qw( and or err eq ne cmp );
@is_binary_keyword{@q} = (1) x scalar(@q);
# Some common function calls whose args can be aligned. These do not
# give good alignments if the lengths differ significantly.
%name_map = (
'unlike' => 'like',
'isnt' => 'is',
##'is_deeply' => 'is', # poor; names lengths too different
);
} ## end BEGIN
sub make_alignment_patterns {
my (
$self,
$ibeg,
$iend,
$ralignment_type_to_go,
$alignment_count,
) = @_;
#------------------------------------------------------------------
# This sub creates arrays of vertical alignment info for one output
# line.
#------------------------------------------------------------------
# Input parameters:
# $ibeg, $iend - index range of this line in the _to_go arrays
# $ralignment_type_to_go - alignment type of tokens, like '=', if any
# $alignment_count - number of alignment tokens in the line
# The arrays which are created contain strings that can be tested by
# the vertical aligner to see if consecutive lines can be aligned
# vertically.
#
# The four arrays are indexed on the vertical
# alignment fields and are:
# @tokens - a list of any vertical alignment tokens for this line.
# These are tokens, such as '=' '&&' '#' etc which
# we want to might align vertically. These are
# decorated with various information such as
# nesting depth to prevent unwanted vertical
# alignment matches.
# @fields - the actual text of the line between the vertical alignment
# tokens.
# @patterns - a modified list of token types, one for each alignment
# field. These should normally each match before alignment is
# allowed, even when the alignment tokens match.
# @field_lengths - the display width of each field
# -------------------------------------
# Shortcut for lines without alignments
# -------------------------------------
if ( !$alignment_count ) {
my $rtokens = [];
my $rfield_lengths =
[ $summed_lengths_to_go[ $iend + 1 ] -
$summed_lengths_to_go[$ibeg] ];
my $rpatterns;
my $rfields;
if ( $ibeg == $iend ) {
$rfields = [ $tokens_to_go[$ibeg] ];
$rpatterns = [ $types_to_go[$ibeg] ];
}
else {
$rfields =
[ join( EMPTY_STRING, @tokens_to_go[ $ibeg .. $iend ] ) ];
$rpatterns =
[ join( EMPTY_STRING, @types_to_go[ $ibeg .. $iend ] ) ];
}
return [ $rtokens, $rfields, $rpatterns, $rfield_lengths ];
}
my $i_start = $ibeg;
my $depth = 0;
my $i_depth_prev = $i_start;
my $depth_prev = $depth;
my %container_name = ( 0 => EMPTY_STRING );
my $saw_exclamation_mark = 0;
my @tokens = ();
my @fields = ();
my @patterns = ();
my @field_lengths = ();
#-------------------------------------------------------------
# Make a container name for any uncontained commas, issue c089
#-------------------------------------------------------------
# This is a generalization of the fix for rt136416 which was a
# specialized patch just for 'use Module' statements.
# We restrict this to semicolon-terminated statements; that way
# we know that the top level commas are not in a list container.
if ( $ibeg == 0 && $iend == $max_index_to_go ) {
my $iterm = $max_index_to_go;
if ( $types_to_go[$iterm] eq '#' ) {
$iterm = iprev_to_go($iterm);
}
# Alignment lines ending like '=> sub {'; fixes issue c093
my $term_type_ok = $types_to_go[$iterm] eq ';';
$term_type_ok ||=
$tokens_to_go[$iterm] eq '{' && $block_type_to_go[$iterm];
if ( $iterm > $ibeg
&& $term_type_ok
&& !$is_my_local_our{ $tokens_to_go[$ibeg] }
&& $levels_to_go[$ibeg] eq $levels_to_go[$iterm] )
{
$container_name{'0'} =
make_uncontained_comma_name( $ibeg, $iterm );
}
}
#--------------------------------
# Begin main loop over all tokens
#--------------------------------
my $j = 0; # field index
$patterns[0] = EMPTY_STRING;
my %token_count;
my $type;
for my $i ( $ibeg .. $iend ) {
# handle a blank space
if ( ( $type = $types_to_go[$i] ) eq 'b' ) {
$patterns[$j] .= $type;
next;
}
#-------------------------------------------------------------
# Part 1: keep track of containers balanced on this line only.
#-------------------------------------------------------------
# These are used below to prevent unwanted cross-line alignments.
# Unbalanced containers already avoid aligning across
# container boundaries.
if ( $type_sequence_to_go[$i] ) {
my $token = $tokens_to_go[$i];
if ( $is_opening_token{$token} ) {
# if container is balanced on this line...
my $i_mate = $mate_index_to_go[$i];
if ( !defined($i_mate) ) { $i_mate = -1 }
if ( $i_mate > $i && $i_mate <= $iend ) {
$i_depth_prev = $i;
$depth_prev = $depth;
$depth++;
# Append the previous token name to make the container name
# more unique. This name will also be given to any commas
# within this container, and it helps avoid undesirable
# alignments of different types of containers.
# Containers beginning with { and [ are given those names
# for uniqueness. That way commas in different containers
# will not match. Here is an example of what this prevents:
# a => [ 1, 2, 3 ],
# b => { b1 => 4, b2 => 5 },
# Here is another example of what we avoid by labeling the
# commas properly:
# is_d( [ $a, $a ], [ $b, $c ] );
# is_d( { foo => $a, bar => $a }, { foo => $b, bar => $c } );
# is_d( [ \$a, \$a ], [ \$b, \$c ] );
my $name =
$token eq '(' ? $self->make_paren_name($i) : $token;
# name cannot be '.', so change to something else if so
if ( $name eq '.' ) { $name = 'dot' }
$container_name{$depth} = "+" . $name;
# Make the container name even more unique if necessary.
# If we are not vertically aligning this opening paren,
# append a character count to avoid bad alignment since
# it usually looks bad to align commas within containers
# for which the opening parens do not align. Here
# is an example very BAD alignment of commas (because
# the atan2 functions are not all aligned):
# $XY =
# $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
# $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
# $X * atan2( $X, 1 ) -
# $Y * atan2( $Y, 1 );
#
# On the other hand, it is usually okay to align commas
# if opening parens align, such as:
# glVertex3d( $cx + $s * $xs, $cy, $z );
# glVertex3d( $cx, $cy + $s * $ys, $z );
# glVertex3d( $cx - $s * $xs, $cy, $z );
# glVertex3d( $cx, $cy - $s * $ys, $z );
#
# To distinguish between these situations, we append
# the length of the line from the previous matching
# token, or beginning of line, to the function name.
# This will allow the vertical aligner to reject
# undesirable matches.
# if we are not aligning on this paren...
if ( !$ralignment_type_to_go->[$i] ) {
# Add the length to the name ...
my $len = $summed_lengths_to_go[$i] -
$summed_lengths_to_go[$i_start];
# Do not include the length of any '!'. Otherwise,
# commas in the following line will not match:
# ok( 20, tapprox( ( pdl 2, 3 ), ( pdl 2, 3 ) ) );
# ok( 21, !tapprox( ( pdl 2, 3 ), ( pdl 2, 4 ) ) );
if ($saw_exclamation_mark) { $len -= 1 }
# For first token, use distance from start of line
# but subtract off the indentation due to level.
# Otherwise, results could vary with indentation.
if ( $i_start == $ibeg ) {
$len +=
leading_spaces_to_go($ibeg) -
$levels_to_go[$i_start] *
$rOpts_indent_columns;
}
if ( $len < 0 ) { $len = 0 }
# tack this length onto the container name to try
# to make a unique token name
$container_name{$depth} .= "-" . $len;
} ## end if ( !$ralignment_type_to_go...)
} ## end if ( $i_mate > $i && $i_mate...)
} ## end if ( $is_opening_token...)
elsif ( $is_closing_token{$token} ) {
$i_depth_prev = $i;
$depth_prev = $depth;
$depth-- if $depth > 0;
}
else {
## must be ternary
}
} ## end if ( $type_sequence_to_go...)
#------------------------------------------------------------
# Part 2: if we find a new synchronization token, we are done
# with a field
#------------------------------------------------------------
if ( $i > $i_start && $ralignment_type_to_go->[$i] ) {
my $tok = my $raw_tok = $ralignment_type_to_go->[$i];
# map similar items
my $tok_map = $operator_map{$tok};
$tok = $tok_map if ($tok_map);
# make separators in different nesting depths unique
# by appending the nesting depth digit.
if ( $raw_tok ne '#' ) {
$tok .= "$nesting_depth_to_go[$i]";
}
# also decorate commas with any container name to avoid
# unwanted cross-line alignments.
if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
# If we are at an opening token which increased depth, we have
# to use the name from the previous depth.
my $depth_last = $i == $i_depth_prev ? $depth_prev : $depth;
my $depth_p =
( $depth_last < $depth ? $depth_last : $depth );
if ( $container_name{$depth_p} ) {
$tok .= $container_name{$depth_p};
}
}
# Patch to avoid aligning leading and trailing if, unless.
# Mark trailing if, unless statements with container names.
# This makes them different from leading if, unless which
# are not so marked at present. If we ever need to name
# them too, we could use ci to distinguish them.
# Example problem to avoid:
# return ( 2, "DBERROR" )
# if ( $retval == 2 );
# if ( scalar @_ ) {
# my ( $a, $b, $c, $d, $e, $f ) = @_;
# }
if ( $raw_tok eq '(' ) {
if ( $ci_levels_to_go[$ibeg]
&& $container_name{$depth} =~ /^\+(if|unless)/ )
{
$tok .= $container_name{$depth};
}
}
# Decorate block braces with block types to avoid
# unwanted alignments such as the following:
# foreach ( @{$routput_array} ) { $fh->print($_) }
# eval { $fh->close() };
if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
my $block_type = $block_type_to_go[$i];
# map certain related block types to allow
# else blocks to align
$block_type = $block_type_map{$block_type}
if ( defined( $block_type_map{$block_type} ) );
# remove sub names to allow one-line sub braces to align
# regardless of name
if ( $block_type =~ /$SUB_PATTERN/ ) { $block_type = 'sub' }
# allow all control-type blocks to align
if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
$tok .= $block_type;
# Avoid aligning opening braces across leading ci level
# changes by marking block type with _ci (issue c224)
if ( $ci_levels_to_go[$ibeg] ) { $tok .= '_1' }
}
# Mark multiple copies of certain tokens with a copy number.
# This will allow the aligner to decide if they are matched.
# For example, the two equals in the example below will be
# labeled '=0' and '=0.2'. Later, the '=0.2' will be ignored
# in alignment because it has no match.
# $| = $debug = 1 if $opt_d;
# $full_index = 1 if $opt_i;
if ( $raw_tok eq '=' || $raw_tok eq '=>' ) {
$token_count{$tok}++;
if ( $token_count{$tok} > 1 ) {
$tok .= '.' . $token_count{$tok};
}
}
# concatenate the text of the consecutive tokens to form
# the field
push( @fields,
join( EMPTY_STRING, @tokens_to_go[ $i_start .. $i - 1 ] ) );
push @field_lengths,
$summed_lengths_to_go[$i] - $summed_lengths_to_go[$i_start];
# store the alignment token for this field
push( @tokens, $tok );
# get ready for the next batch
$i_start = $i;
$saw_exclamation_mark = 0;
$j++;
$patterns[$j] = EMPTY_STRING;
} ## end if ( new synchronization token
#-----------------------------------------------
# Part 3: continue accumulating the next pattern
#-----------------------------------------------
if ( $is_k_w_n_C_bang{$type} ) {
# for keywords we have to use the actual text
if ( $type eq 'k' ) {
my $tok_fix = $tokens_to_go[$i];
# but map certain keywords to a common string to allow
# alignment.
$tok_fix = $keyword_map{$tok_fix}
if ( defined( $keyword_map{$tok_fix} ) );
# VSN PATCH: all 'n' chars in a pattern must be for token
# type 'n' (number). i.e. convert 'print' to 'priNt'
$tok_fix =~ tr/n/N/;
$patterns[$j] .= $tok_fix;
}
# ignore any ! in patterns
elsif ( $type eq '!' ) {
$saw_exclamation_mark = 1;
}
# Handle $type =~ /^[wnC]$/...
# Mark most things before arrows as a quote to
# get them to line up. Testfile: mixed.pl.
else {
my $type_fix = $type;
if ( $i < $iend - 1 ) {
my $next_type = $types_to_go[ $i + 1 ];
my $i_next_nonblank =
( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
$type_fix = 'Q';
# Patch to ignore leading minus before words,
# by changing pattern 'mQ' into just 'Q',
# so that we can align things like this:
# Button => "Print letter \"~$_\"",
# -command => [ sub { print "$_[0]\n" }, $_ ],
if ( $patterns[$j] eq 'm' ) {
$patterns[$j] = EMPTY_STRING;
}
}
}
# Convert a bareword within braces into a quote for
# matching. This will allow alignment of expressions like
# this:
# local ( $SIG{'INT'} ) = IGNORE;
# local ( $SIG{ALRM} ) = 'POSTMAN';
if ( $type eq 'w'
&& $i > $ibeg
&& $i < $iend
&& $types_to_go[ $i - 1 ] eq 'L'
&& $types_to_go[ $i + 1 ] eq 'R' )
{
$type_fix = 'Q';
}
# VSN PATCH: no longer changing 'n' to 'Q' here; this
# will be handled in the vertical aligner and allow
# the aligner to find numbers more efficiently.
##: if ( $type eq 'n' ) { $type_fix = 'Q' }
$patterns[$j] .= $type_fix;
}
} ## end elsif ( $is_k_w_n_C{$type} )
# everything else
else {
$patterns[$j] .= $type;
# remove any zero-level name at first fat comma
if ( $depth == 0 && $type eq '=>' ) {
$container_name{$depth} = EMPTY_STRING;
}
}
} ## end for my $i ( $ibeg .. $iend)
#---------------------------------------------------------------
# End of main loop .. join text of tokens to make the last field
#---------------------------------------------------------------
push( @fields,
join( EMPTY_STRING, @tokens_to_go[ $i_start .. $iend ] ) );
push @field_lengths,
$summed_lengths_to_go[ $iend + 1 ] - $summed_lengths_to_go[$i_start];
return [ \@tokens, \@fields, \@patterns, \@field_lengths ];
} ## end sub make_alignment_patterns
sub make_uncontained_comma_name {
my ( $ibeg, $iterm ) = @_;
# Given:
# $ibeg = first index
# $iterm = last index
# Make a container name by combining all leading barewords,
# keywords and functions.
my $name = EMPTY_STRING;
my $count = 0;
my $count_max;
my $iname_end;
my $ilast_blank;
for my $ii ( $ibeg .. $iterm ) {
my $type = $types_to_go[$ii];
if ( $type eq 'b' ) {
$ilast_blank = $ii;
next;
}
my $token = $tokens_to_go[$ii];
# Give up if we find an opening paren, binary operator or
# comma within or after the proposed container name.
if ( $token eq '('
|| $is_binary_type{$type}
|| $type eq 'k' && $is_binary_keyword{$token} )
{
$name = EMPTY_STRING;
last;
}
# The container name is only built of certain types:
# 'k'=builtin keyword, 'U'=user defined sub, 'w'=unknown bareword
last if ( !$is_kwU{$type} );
# Normally it is made of one word, but two words for 'use'
if ( $count == 0 ) {
if ( $type eq 'k'
&& $is_use_like{ $tokens_to_go[$ii] } )
{
$count_max = 2;
}
else {
$count_max = 1;
}
}
elsif ( defined($count_max) && $count >= $count_max ) {
last;
}
else {
## continue
}
if ( defined( $name_map{$token} ) ) {
$token = $name_map{$token};
}
$name .= SPACE . $token;
$iname_end = $ii;
$count++;
}
# Require a space after the container name token(s)
if ( $name
&& defined($ilast_blank)
&& $ilast_blank > $iname_end )
{
$name = substr( $name, 1 );
}
return $name;
} ## end sub make_uncontained_comma_name
} ## end closure make_alignment_patterns
sub make_paren_name {
my ( $self, $i ) = @_;
# The token at index $i is a '('.
# Create an alignment name for it to avoid incorrect alignments.
# Start with the name of the previous nonblank token...
my $name = EMPTY_STRING;
my $im = $i - 1;
return EMPTY_STRING if ( $im < 0 );
if ( $types_to_go[$im] eq 'b' ) { $im--; }
return EMPTY_STRING if ( $im < 0 );
$name = $tokens_to_go[$im];
# Prepend any sub name to an isolated -> to avoid unwanted alignments
# [test case is test8/penco.pl]
if ( $name eq '->' ) {
$im--;
if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
$name = $tokens_to_go[$im] . $name;
}
}
# Finally, remove any leading arrows
if ( substr( $name, 0, 2 ) eq '->' ) {
$name = substr( $name, 2 );
}
return $name;
} ## end sub make_paren_name
{ ## begin closure get_final_indentation
my ( $last_indentation_written, $last_unadjusted_indentation,
$last_leading_token );
sub initialize_get_final_indentation {
$last_indentation_written = 0;
$last_unadjusted_indentation = 0;
$last_leading_token = EMPTY_STRING;
return;
} ## end sub initialize_get_final_indentation
sub get_final_indentation {
my (
$self,
$ibeg,
$iend,
$rindentation_list,
$level_jump,
) = @_;
#--------------------------------------------------------------
# This routine makes any necessary adjustments to get the final
# indentation of a line in the Formatter.
#--------------------------------------------------------------
# Given:
# ($ibeg, $iend) = index range of tokens on this line
# $rindentation_list = ref to indentation of each line in this batch,
# to be updated by this sub
# $level_jump = level change to $token $ibeg from previous token
# It starts with the basic indentation which has been defined for the
# leading token, and then takes into account any options that the user
# has set regarding special indenting and outdenting.
# This routine has to resolve a number of complex interacting issues,
# including:
# 1. The various -cti=n type flags, which contain the desired change in
# indentation for lines ending in commas and semicolons, should be
# followed,
# 2. qw quotes require special processing and do not fit perfectly
# with normal containers,
# 3. formatting with -wn can complicate things, especially with qw
# quotes,
# 4. formatting with the -lp option is complicated, and does not
# work well with qw quotes and with -wn formatting.
# 5. a number of special situations, such as 'cuddled' formatting.
# 6. This routine is mainly concerned with outdenting closing tokens
# but note that there is some overlap with the functions of sub
# undo_ci, which was processed earlier, so care has to be taken to
# keep them coordinated.
my $this_batch = $self->[_this_batch_];
# Find the last code token of this line
my $i_terminal = $iend;
my $terminal_type = $types_to_go[$iend];
if ( $terminal_type eq '#' && $i_terminal > $ibeg ) {
$i_terminal -= 1;
$terminal_type = $types_to_go[$i_terminal];
if ( $terminal_type eq 'b' && $i_terminal > $ibeg ) {
$i_terminal -= 1;
$terminal_type = $types_to_go[$i_terminal];
}
}
my $is_outdented_line;
my $type_beg = $types_to_go[$ibeg];
my $token_beg = $tokens_to_go[$ibeg];
my $level_beg = $levels_to_go[$ibeg];
my $block_type_beg = $block_type_to_go[$ibeg];
my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
my $seqno_beg = $type_sequence_to_go[$ibeg];
my $is_closing_type_beg = $is_closing_type{$type_beg};
# QW INDENTATION PATCH 3:
my $seqno_qw_closing;
if ( $type_beg eq 'q' && $ibeg == 0 ) {
my $KK = $K_to_go[$ibeg];
$seqno_qw_closing =
$self->[_rending_multiline_qw_seqno_by_K_]->{$KK};
}
my $is_semicolon_terminated = $terminal_type eq ';'
&& ( $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]
|| $seqno_qw_closing );
# NOTE: A future improvement would be to make it semicolon terminated
# even if it does not have a semicolon but is followed by a closing
# block brace. This would undo ci even for something like the
# following, in which the final paren does not have a semicolon because
# it is a possible weld location:
# if ($BOLD_MATH) {
# (
# $labels, $comment,
# join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
# )
# }
#
#---------------------------------------------------------
# Section 1: set a flag and a default indentation
#
# Most lines are indented according to the initial token.
# But it is common to outdent to the level just after the
# terminal token in certain cases...
# adjust_indentation flag:
# 0 - do not adjust
# 1 - outdent
# 2 - vertically align with opening token
# 3 - indent
#---------------------------------------------------------
my $adjust_indentation = 0;
my $default_adjust_indentation = 0;
# Parameters needed for option 2, aligning with opening token:
my (
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
);
#-------------------------------------
# Section 1A:
# if line starts with a sequenced item
#-------------------------------------
if ( $seqno_beg || $seqno_qw_closing ) {
# This can be tedious so we let a sub do it
(
$adjust_indentation,
$default_adjust_indentation,
$opening_indentation,
$opening_offset,
$is_leading,
$opening_exists,
) = $self->get_closing_token_indentation(
$ibeg,
$iend,
$rindentation_list,
$level_jump,
$i_terminal,
$is_semicolon_terminated,
$seqno_qw_closing,
);
}
#-----------------------------------------
# Section 1B:
# if line starts with a non-sequenced item
#-----------------------------------------
else {
if ( $type_beg eq ';' && !$rOpts_indent_leading_semicolon ) {
$adjust_indentation = 1;
}
}
#---------------------------------------------------------
# Section 2: set indentation according to flag set above
#
# Select the indentation object to define leading
# whitespace. If we are outdenting something like '} } );'
# then we want to use one level below the last token
# ($i_terminal) in order to get it to fully outdent through
# all levels.
#---------------------------------------------------------
my $indentation;
my $lev;
my $level_end = $levels_to_go[$iend];
#------------------------------------
# Section 2A: adjust_indentation == 0
# No change in indentation
#------------------------------------
if ( $adjust_indentation == 0 ) {
$indentation = $leading_spaces_beg;
$lev = $level_beg;
}
#-------------------------------------------------------------------
# Section 2B: adjust_indentation == 1
# Change the indentation to be that of a different token on the line
#-------------------------------------------------------------------
elsif ( $adjust_indentation == 1 ) {
# Previously, the indentation of the terminal token was used:
# OLD CODING:
# $indentation = $reduced_spaces_to_go[$i_terminal];
# $lev = $levels_to_go[$i_terminal];
# Generalization for MOJO patch:
# Use the lowest level indentation of the tokens on the line.
# For example, here we can use the indentation of the ending ';':
# } until ($selection > 0 and $selection < 10); # ok to use ';'
# But this will not outdent if we use the terminal indentation:
# )->then( sub { # use indentation of the ->, not the {
# Warning: reduced_spaces_to_go[] may be a reference, do not
# do numerical checks with it
my $i_ind = $ibeg;
$indentation = $reduced_spaces_to_go[$i_ind];
$lev = $levels_to_go[$i_ind];
while ( ++$i_ind <= $i_terminal ) {
if ( $levels_to_go[$i_ind] < $lev ) {
$indentation = $reduced_spaces_to_go[$i_ind];
$lev = $levels_to_go[$i_ind];
}
} ## end while ( ++$i_ind <= $i_terminal)
}
#--------------------------------------------------------------
# Section 2C: adjust_indentation == 2
# Handle indented closing token which aligns with opening token
#--------------------------------------------------------------
elsif ( $adjust_indentation == 2 ) {
# handle option to align closing token with opening token
$lev = $level_beg;
# calculate spaces needed to align with opening token
my $space_count =
get_spaces($opening_indentation) + $opening_offset;
# Indent less than the previous line.
#
# Problem: For -lp we don't exactly know what it was if there
# were recoverable spaces sent to the aligner. A good solution
# would be to force a flush of the vertical alignment buffer, so
# that we would know. For now, this rule is used for -lp:
#
# When the last line did not start with a closing token we will
# be optimistic that the aligner will recover everything wanted.
#
# This rule will prevent us from breaking a hierarchy of closing
# tokens, and in a worst case will leave a closing paren too far
# indented, but this is better than frequently leaving it not
# indented enough.
my $last_spaces = get_spaces($last_indentation_written);
if ( ref($last_indentation_written)
&& !$is_closing_token{$last_leading_token} )
{
$last_spaces +=
get_recoverable_spaces($last_indentation_written);
}
# reset the indentation to the new space count if it works
# only options are all or none: nothing in-between looks good
$lev = $level_beg;
my $diff = $last_spaces - $space_count;
if ( $diff > 0 ) {
$indentation = $space_count;
}
else {
# We need to fix things ... but there is no good way to do it.
# The best solution is for the user to use a longer maximum
# line length. We could get a smooth variation if we just move
# the paren in using
# $space_count -= ( 1 - $diff );
# But unfortunately this can give a rather unbalanced look.
# For -xlp we currently allow a tolerance of one indentation
# level and then revert to a simpler default. This will jump
# suddenly but keeps a balanced look.
if ( $rOpts_extended_line_up_parentheses
&& $diff >= -$rOpts_indent_columns
&& $space_count > $leading_spaces_beg )
{
$indentation = $space_count;
}
# Otherwise revert to defaults
elsif ( $default_adjust_indentation == 0 ) {
$indentation = $leading_spaces_beg;
}
elsif ( $default_adjust_indentation == 1 ) {
$indentation = $reduced_spaces_to_go[$i_terminal];
$lev = $levels_to_go[$i_terminal];
}
else {
# shouldn't happen - default_adjust_indentation is 0 or 1
DEVEL_MODE
&& Fault(
"default_indentation=$default_adjust_indentation expected to be 0 or 1\n"
);
# continue with 0 if not in DEVEL_MODE
$indentation = $leading_spaces_beg;
}
}
}
#-------------------------------------------------------------
# Section 2D: adjust_indentation == 3
# Full indentation of closing tokens (-icb and -icp or -cti=2)
#-------------------------------------------------------------
else {
# handle -icb (indented closing code block braces)
# Updated method for indented block braces: indent one full level if
# there is no continuation indentation. This will occur for major
# structures such as sub, if, else, but not for things like map
# blocks.
#
# Note: only code blocks without continuation indentation are
# handled here (if, else, unless, ..). In the following snippet,
# the terminal brace of the sort block will have continuation
# indentation as shown so it will not be handled by the coding
# here. We would have to undo the continuation indentation to do
# this, but it probably looks ok as is. This is a possible future
# update for semicolon terminated lines.
#
# if ($sortby eq 'date' or $sortby eq 'size') {
# @files = sort {
# $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
# or $a cmp $b
# } @files;
# }
#
if ( $block_type_beg
&& $ci_levels_to_go[$i_terminal] == 0 )
{
my $spaces = get_spaces( $leading_spaces_to_go[$i_terminal] );
$indentation = $spaces + $rOpts_indent_columns;
# NOTE: for -lp we could create a new indentation object, but
# there is probably no need to do it
}
# handle -icp and any -icb block braces which fall through above
# test such as the 'sort' block mentioned above.
else {
# There are currently two ways to handle -icp...
# One way is to use the indentation of the previous line:
# $indentation = $last_indentation_written;
# The other way is to use the indentation that the previous line
# would have had if it hadn't been adjusted:
$indentation = $last_unadjusted_indentation;
# Current method: use the minimum of the two. This avoids
# inconsistent indentation.
if ( get_spaces($last_indentation_written) <
get_spaces($indentation) )
{
$indentation = $last_indentation_written;
}
}
# use previous indentation but use own level
# to cause list to be flushed properly
$lev = $level_beg;
}
#-------------------------------------------------------------
# Remember indentation except for multi-line quotes, which get
# no indentation
#-------------------------------------------------------------
if ( !( $ibeg == 0 && $this_batch->[_starting_in_quote_] ) ) {
$last_indentation_written = $indentation;
$last_unadjusted_indentation = $leading_spaces_beg;
$last_leading_token = $token_beg;
# Patch to make a line which is the end of a qw quote work with the
# -lp option. Make $token_beg look like a closing token as some
# type even if it is not. This variable will become
# $last_leading_token at the end of this loop. Then, if the -lp
# style is selected, and the next line is also a
# closing token, it will not get more indentation than this line.
# We need to do this because qw quotes (at present) only get
# continuation indentation, not one level of indentation, so we
# need to turn off the -lp indentation.
# ... a picture is worth a thousand words:
# perltidy -wn -gnu (Without this patch):
# ok(defined(
# $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
# 2981014)])
# ));
# perltidy -wn -gnu (With this patch):
# ok(defined(
# $seqio = $gb->get_Stream_by_batch([qw(J00522 AF303112
# 2981014)])
# ));
if ( $seqno_qw_closing
&& ( length($token_beg) > 1 || $token_beg eq '>' ) )
{
$last_leading_token = ')';
}
}
#---------------------------------------------------------------------
# Rule: lines with leading closing tokens should not be outdented more
# than the line which contained the corresponding opening token.
#---------------------------------------------------------------------
if ( defined($opening_indentation) ) {
# MOJO patch: Set a flag if this lines begins with ')->'
my $leading_paren_arrow = (
$is_closing_type_beg
&& $token_beg eq ')'
&& (
(
$ibeg < $i_terminal
&& $types_to_go[ $ibeg + 1 ] eq '->'
)
|| ( $ibeg < $i_terminal - 1
&& $types_to_go[ $ibeg + 1 ] eq 'b'
&& $types_to_go[ $ibeg + 2 ] eq '->' )
)
);
# Updated per bug report in alex_bug.pl: we must not
# mess with the indentation of closing logical braces, so
# we must treat something like '} else {' as if it were
# an isolated brace
my $is_isolated_block_brace = $block_type_beg
&& (
$i_terminal == $ibeg
|| $is_if_elsif_else_unless_while_until_for_foreach{
$block_type_beg}
);
# only do this for a ':; which is aligned with its leading '?'
my $is_unaligned_colon = $type_beg eq ':' && !$is_leading;
if (
!$leading_paren_arrow
&& !$is_isolated_block_brace
&& !$is_unaligned_colon
&& ( get_spaces($opening_indentation) >
get_spaces($indentation) )
)
{
$indentation = $opening_indentation;
}
}
#----------------------------------------------------
# remember the indentation of each line of this batch
#----------------------------------------------------
push @{$rindentation_list}, $indentation;
#---------------------------------------------
# outdent lines with certain leading tokens...
#---------------------------------------------
if (
# must be first word of this batch
$ibeg == 0
# and ...
&& (
# certain leading keywords if requested
$rOpts_outdent_keywords
&& $type_beg eq 'k'
&& $outdent_keyword{$token_beg}
# or labels if requested
|| $rOpts_outdent_labels && $type_beg eq 'J'
# or static block comments if requested
|| $this_batch->[_is_static_block_comment_]
&& $rOpts_outdent_static_block_comments
)
)
{
my $space_count = leading_spaces_to_go($ibeg);
if ( $space_count > 0 ) {
$space_count -= $rOpts_continuation_indentation;
$is_outdented_line = 1;
if ( $space_count < 0 ) { $space_count = 0 }
# do not promote a spaced static block comment to non-spaced;
# this is not normally necessary but could be for some
# unusual user inputs (such as -ci = -i)
if ( $type_beg eq '#' && $space_count == 0 ) {
$space_count = 1;
}
$indentation = $space_count;
}
}
return (
$indentation,
$lev,
$level_end,
$i_terminal,
$is_outdented_line,
);
} ## end sub get_final_indentation
sub get_closing_token_indentation {
# Determine indentation adjustment for a line with a leading closing
# token - i.e. one of these: ) ] } :
# The indentation adjustment is found by checking all user controls,
# which are sometimes in conflict. So the logic is rather complex.
# Returns:
# Flags giving the indentation to use for this line:
# $adjust_indentation,
# 0 - do not adjust
# 1 - outdent
# 2 - vertically align with opening token
# 3 - indent
# $default_adjust_indentation
# a default in case $adjust_indentation cannot be used
#
# Also returns info about the indentation of the opening token,
# obtained from sub 'get_opening_indentation':
# $opening_indentation,
# $opening_offset,
# $is_leading,
# $opening_exists,
my (
$self,
$ibeg,
$iend,
$rindentation_list,
$level_jump,
$i_terminal,
$is_semicolon_terminated,
$seqno_qw_closing,
) = @_;
my $adjust_indentation = 0;
my $default_adjust_indentation = $adjust_indentation;
my $terminal_type = $types_to_go[$i_terminal];
my $type_beg = $types_to_go[$ibeg];
my $token_beg = $tokens_to_go[$ibeg];
my $level_beg = $levels_to_go[$ibeg];
my $block_type_beg = $block_type_to_go[$ibeg];
my $leading_spaces_beg = $leading_spaces_to_go[$ibeg];
my $seqno_beg = $type_sequence_to_go[$ibeg];
my $is_closing_type_beg = $is_closing_type{$type_beg};
# Return variables:
my (
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
);
# Honor any flag to reduce -ci set by the -bbxi=n option
if ( $seqno_beg && $self->[_rwant_reduced_ci_]->{$seqno_beg} ) {
# if this is an opening, it must be alone on the line ...
if ( $is_closing_type{$type_beg} || $ibeg == $i_terminal ) {
$adjust_indentation = 1;
}
# ... or a single welded unit (fix for b1173)
elsif ($total_weld_count) {
my $K_beg = $K_to_go[$ibeg];
my $Kterm = $K_to_go[$i_terminal];
my $Kterm_test = $self->[_rK_weld_left_]->{$Kterm};
if ( defined($Kterm_test) && $Kterm_test >= $K_beg ) {
$Kterm = $Kterm_test;
}
if ( $Kterm == $K_beg ) { $adjust_indentation = 1 }
}
else {
# no change in ci needed
}
}
my $ris_bli_container = $self->[_ris_bli_container_];
my $is_bli_beg = $seqno_beg ? $ris_bli_container->{$seqno_beg} : 0;
# Update the $is_bli flag as we go. It is initially 1.
# We note seeing a leading opening brace by setting it to 2.
# If we get to the closing brace without seeing the opening then we
# turn it off. This occurs if the opening brace did not get output
# at the start of a line, so we will then indent the closing brace
# in the default way.
if ( $is_bli_beg && $is_bli_beg == 1 ) {
my $K_opening_container = $self->[_K_opening_container_];
my $K_opening = $K_opening_container->{$seqno_beg};
my $K_beg = $K_to_go[$ibeg];
if ( $K_beg eq $K_opening ) {
$ris_bli_container->{$seqno_beg} = $is_bli_beg = 2;
}
else { $is_bli_beg = 0 }
}
# QW PATCH for the combination -lp -wn
# For -lp formatting use $ibeg_weld_fix to get around the problem
# that with -lp type formatting the opening and closing tokens to not
# have sequence numbers.
my $ibeg_weld_fix = $ibeg;
if ( $seqno_qw_closing && $total_weld_count ) {
my $i_plus = $inext_to_go[$ibeg];
if ( $i_plus <= $max_index_to_go ) {
my $K_plus = $K_to_go[$i_plus];
if ( defined( $self->[_rK_weld_left_]->{$K_plus} ) ) {
$ibeg_weld_fix = $i_plus;
}
}
}
# if we are at a closing token of some type..
if ( $is_closing_type_beg || $seqno_qw_closing ) {
my $K_beg = $K_to_go[$ibeg];
# get the indentation of the line containing the corresponding
# opening token
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= $self->get_opening_indentation(
$ibeg_weld_fix,
$rindentation_list,
$seqno_qw_closing
);
# Patch for rt144979, part 1. Coordinated with part 2.
# Do not undo ci for a cuddled closing brace control; it
# needs to be treated exactly the same ci as an isolated
# closing brace.
my $is_cuddled_closing_brace = $seqno_beg
&& $self->[_ris_cuddled_closing_brace_]->{$seqno_beg};
# First set the default behavior:
if (
# default behavior is to outdent closing lines
# of the form: "); }; ]; )->xxx;"
$is_semicolon_terminated
# and 'cuddled parens' of the form: ")->pack(". Bug fix for RT
# #123749]: the TYPES here were incorrectly ')' and '('. The
# corrected TYPES are '}' and '{'. But skip a cuddled block.
|| (
$terminal_type eq '{'
&& $type_beg eq '}'
&& ( $nesting_depth_to_go[$iend] + 1 ==
$nesting_depth_to_go[$ibeg] )
&& !$is_cuddled_closing_brace
)
# remove continuation indentation for any line like
# } ... {
# or without ending '{' and unbalanced, such as
# such as '}->{$operator}'
|| (
$type_beg eq '}'
&& ( $types_to_go[$iend] eq '{'
|| $levels_to_go[$iend] < $level_beg )
# but not if a cuddled block
&& !$is_cuddled_closing_brace
)
# and when the next line is at a lower indentation level...
# PATCH #1: and only if the style allows undoing continuation
# for all closing token types. We should really wait until
# the indentation of the next line is known and then make
# a decision, but that would require another pass.
# PATCH #2: and not if this token is under -xci control
|| ( $level_jump < 0
&& !$some_closing_token_indentation
&& !$self->[_rseqno_controlling_my_ci_]->{$K_beg} )
# Patch for -wn=2, multiple welded closing tokens
|| ( $i_terminal > $ibeg
&& $is_closing_type{ $types_to_go[$iend] } )
# Alternate Patch for git #51, isolated closing qw token not
# outdented if no-delete-old-newlines is set. This works, but
# a more general patch elsewhere fixes the real problem: ljump.
# || ( $seqno_qw_closing && $ibeg == $i_terminal )
)
{
$adjust_indentation = 1;
}
# outdent something like '),'
if (
$terminal_type eq ','
# Removed this constraint for -wn
# OLD: allow just one character before the comma
# && $i_terminal == $ibeg + 1
# require LIST environment; otherwise, we may outdent too much -
# this can happen in calls without parentheses (overload.t);
&& $self->is_in_list_by_i($i_terminal)
)
{
$adjust_indentation = 1;
}
# undo continuation indentation of a terminal closing token if
# it is the last token before a level decrease. This will allow
# a closing token to line up with its opening counterpart, and
# avoids an indentation jump larger than 1 level.
my $rLL = $self->[_rLL_];
my $Klimit = $self->[_Klimit_];
if ( $i_terminal == $ibeg
&& $is_closing_type_beg
&& defined($K_beg)
&& $K_beg < $Klimit )
{
my $K_plus = $K_beg + 1;
my $type_plus = $rLL->[$K_plus]->[_TYPE_];
if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
$type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
}
if ( $type_plus eq '#' && $K_plus < $Klimit ) {
$type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
if ( $type_plus eq 'b' && $K_plus < $Klimit ) {
$type_plus = $rLL->[ ++$K_plus ]->[_TYPE_];
}
# Note: we have skipped past just one comment (perhaps a
# side comment). There could be more, and we could easily
# skip past all the rest with the following code, or with a
# while loop. It would be rare to have to do this, and
# those block comments would still be indented, so it would
# to leave them indented. So it seems best to just stop at
# a maximum of one comment.
##if ($type_plus eq '#') {
## $K_plus = $self->K_next_code($K_plus);
##}
}
if ( !$is_bli_beg && defined($K_plus) ) {
my $lev = $level_beg;
my $level_next = $rLL->[$K_plus]->[_LEVEL_];
# and do not undo ci if it was set by the -xci option
$adjust_indentation = 1
if ( $level_next < $lev
&& !$self->[_rseqno_controlling_my_ci_]->{$K_beg} );
}
# Patch for RT #96101, in which closing brace of anonymous subs
# was not outdented. We should look ahead and see if there is
# a level decrease at the next token (i.e., a closing token),
# but right now we do not have that information. For now
# we see if we are in a list, and this works well.
# See test files 'sub*.t' for good test cases.
if ( !$rOpts_indent_closing_brace
&& $block_type_beg
&& $self->[_ris_asub_block_]->{$seqno_beg}
&& $self->is_in_list_by_i($i_terminal) )
{
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= $self->get_opening_indentation( $ibeg,
$rindentation_list, undef );
my $indentation = $leading_spaces_beg;
if ( defined($opening_indentation)
&& get_spaces($indentation) >
get_spaces($opening_indentation) )
{
$adjust_indentation = 1;
}
}
}
# YVES patch 1 of 2:
# Undo ci of line with leading closing eval brace,
# but not beyond the indentation of the line with
# the opening brace.
if ( $block_type_beg
&& $block_type_beg eq 'eval'
&& !ref($leading_spaces_beg)
&& !$rOpts_indent_closing_brace )
{
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= $self->get_opening_indentation( $ibeg, $rindentation_list,
undef );
my $indentation = $leading_spaces_beg;
if ( defined($opening_indentation)
&& get_spaces($indentation) >
get_spaces($opening_indentation) )
{
$adjust_indentation = 1;
}
}
# patch for issue git #40: -bli setting has priority
$adjust_indentation = 0 if ($is_bli_beg);
$default_adjust_indentation = $adjust_indentation;
# Now modify default behavior according to user request:
# handle option to indent non-blocks of the form ); }; ];
# But don't do special indentation to something like ')->pack('
if ( !$block_type_beg ) {
# Note that logical padding has already been applied, so we may
# need to remove some spaces to get a valid hash key.
my $tok = $token_beg;
my $cti = $closing_token_indentation{$tok};
# Fix the value of 'cti' for an isolated non-welded closing qw
# delimiter.
if ( $seqno_qw_closing && $ibeg_weld_fix == $ibeg ) {
# A quote delimiter which is not a container will not have
# a cti value defined. In this case use the style of a
# paren. For example
# my @fars = (
# qw<
# far
# farfar
# farfars-far
# >,
# );
if ( !defined($cti) && length($tok) == 1 ) {
# something other than ')', '}', ']' ; use flag for ')'
$cti = $closing_token_indentation{')'};
# But for now, do not outdent non-container qw
# delimiters because it would would change existing
# formatting.
if ( $tok ne '>' ) { $cti = 3 }
}
# A non-welded closing qw cannot currently use -cti=1
# because that option requires a sequence number to find
# the opening indentation, and qw quote delimiters are not
# sequenced items.
if ( defined($cti) && $cti == 1 ) { $cti = 0 }
}
if ( !defined($cti) ) {
# $cti may not be defined for several reasons.
# -padding may have been applied so the character
# has a length > 1
# - we may have welded to a closing quote token.
# Here is an example (perltidy -wn):
# __PACKAGE__->load_components( qw(
# > Core
# >
# > ) );
$adjust_indentation = 0;
}
elsif ( $cti == 1 ) {
if ( $i_terminal <= $ibeg + 1
|| $is_semicolon_terminated )
{
$adjust_indentation = 2;
}
else {
$adjust_indentation = 0;
}
}
elsif ( $cti == 2 ) {
if ($is_semicolon_terminated) {
$adjust_indentation = 3;
}
else {
$adjust_indentation = 0;
}
}
elsif ( $cti == 3 ) {
$adjust_indentation = 3;
}
else {
## cti == 0
}
}
# handle option to indent blocks
else {
if (
$rOpts_indent_closing_brace
&& (
$i_terminal == $ibeg # isolated terminal '}'
|| $is_semicolon_terminated
)
) # } xxxx ;
{
$adjust_indentation = 3;
}
}
} ## end if ( $is_closing_type_beg || $seqno_qw_closing )
# if line begins with a ':', align it with any
# previous line leading with corresponding ?
elsif ( $type_beg eq ':' ) {
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= $self->get_opening_indentation( $ibeg,
$rindentation_list, undef );
if ($is_leading) { $adjust_indentation = 2; }
}
else {
# not a closing type
}
# A final check: reset the flag value from 1 to 0 if moving left would
# give this closing token less indentation than the line with its
# opening token. We could do this check at the top for more efficiency
# except for -lp. For -lp, if the $adjust_indentation flag flips from
# 1 to 2, then the -lp logic can do a better recovery if it knows that
# the $default_adjust_indentation=1 instead of 0 (c435)
if ( $adjust_indentation == 1 ) {
my $no_left_adjustment_space = defined($opening_indentation)
&& get_spaces($leading_spaces_beg) <=
get_spaces($opening_indentation);
if ($no_left_adjustment_space) {
$adjust_indentation = 0;
}
}
return (
$adjust_indentation,
$default_adjust_indentation,
$opening_indentation,
$opening_offset,
$is_leading,
$opening_exists,
);
} ## end sub get_closing_token_indentation
} ## end closure get_final_indentation
sub get_opening_indentation {
# get the indentation of the line which output the opening token
# corresponding to a given closing token in the current output batch.
#
# given:
# $i_closing - index in this line of a closing token ')' '}' or ']'
#
# $ri_first - reference to list of the first index $i for each output
# line in this batch
# $ri_last - reference to list of the last index $i for each output line
# in this batch
# $rindentation_list - reference to a list containing the indentation
# used for each line.
# $qw_seqno - optional sequence number to use if normal seqno not defined
# (NOTE: would be more general to just look this up from index i)
#
# return:
# -the indentation of the line which contained the opening token
# which matches the token at index $i_opening
# -and its offset (number of columns) from the start of the line
#
my (
$self,
$i_closing,
$rindentation_list,
$qw_seqno
) = @_;
# first, see if the opening token is in the current batch
my $i_opening = $mate_index_to_go[$i_closing];
my ( $indent, $offset, $is_leading, $exists );
$exists = 1;
if ( defined($i_opening) && $i_opening >= 0 ) {
# it is..look up the indentation
( $indent, $offset, $is_leading ) =
$self->lookup_opening_indentation( $i_opening, $rindentation_list );
}
# if not, it should have been stored in the hash by a previous batch
else {
my $seqno = $type_sequence_to_go[$i_closing];
$seqno = $qw_seqno unless ($seqno);
( $indent, $offset, $is_leading, $exists ) =
get_saved_opening_indentation($seqno);
}
return ( $indent, $offset, $is_leading, $exists );
} ## end sub get_opening_indentation
sub examine_vertical_tightness_flags {
my ($self) = @_;
# For efficiency, we will set a flag to skip all calls to sub
# 'set_vertical_tightness_flags' if vertical tightness is not possible with
# the user input parameters. If vertical tightness is possible, we will
# simply leave the flag undefined and return.
# Vertical tightness is never possible with --freeze-whitespace
if ($rOpts_freeze_whitespace) {
$self->[_no_vertical_tightness_flags_] = 1;
return;
}
# This sub is coordinated with sub set_vertical_tightness_flags.
# The Section numbers in the following comments are the sections
# in sub set_vertical_tightness_flags:
# Examine controls for Section 1a:
return if ($rOpts_line_up_parentheses);
foreach my $key ( keys %opening_vertical_tightness ) {
return if ( $opening_vertical_tightness{$key} );
}
# Examine controls for Section 1b:
foreach my $key ( keys %closing_vertical_tightness ) {
return if ( $closing_vertical_tightness{$key} );
}
# Examine controls for Section 1c:
foreach my $key ( keys %opening_token_right ) {
return if ( $opening_token_right{$key} );
}
# Examine controls for Section 1d:
foreach my $key ( keys %stack_opening_token ) {
return if ( $stack_opening_token{$key} );
}
foreach my $key ( keys %stack_closing_token ) {
return if ( $stack_closing_token{$key} );
}
# Examine controls for Section 2:
return if ($rOpts_block_brace_vertical_tightness);
# Examine controls for Section 3:
return if ($rOpts_stack_closing_block_brace);
# None of the controls used for vertical tightness are set, so
# we can skip all calls to sub set_vertical_tightness_flags
$self->[_no_vertical_tightness_flags_] = 1;
return;
} ## end sub examine_vertical_tightness_flags
my %is_uncovered_operator;
# b1060, b1499
BEGIN {
my @q = qw( ? : && || );
@is_uncovered_operator{@q} = (1) x scalar(@q);
}
sub set_vertical_tightness_flags {
my ( $self, $nline, $closing_side_comment ) = @_;
# Given:
# $nline = index of this line in the current output batch
# $closing_side_comment = true if line has side comment
# Define vertical tightness controls for the nth line of a batch.
# Note: do not call this sub for a block comment or if
# $rOpts_freeze_whitespace is set.
# These parameters are passed to the vertical aligner to indicated
# if we should combine this line with the next line to achieve the
# desired vertical tightness. This was previously an array but
# has been converted to a hash:
# old hash Meaning
# index key
#
# 0 _vt_type: 1=opening non-block 2=closing non-block
# 3=opening block brace 4=closing block brace
#
# 1a _vt_opening_flag: 1=no multiple steps, 2=multiple steps ok
# 1b _vt_closing_flag: spaces of padding to use if closing
# 2 _vt_seqno: sequence number of container
# 3 _vt_valid flag: do not append if this flag is false. Will be
# true if appropriate -vt flag is set. Otherwise, Will be
# made true only for 2 line container in parens with -lp
# 4 _vt_seqno_beg: sequence number of first token of line
# 5 _vt_seqno_end: sequence number of last token of line
# 6 _vt_min_lines: min number of lines for joining opening cache,
# 0=no constraint
# 7 _vt_max_lines: max number of lines for joining opening cache,
# 0=no constraint
# The vertical tightness mechanism can add whitespace, so whitespace can
# continually increase if we allowed it when the -fws flag is set.
# See case b499 for an example.
# Uses these global parameters:
# $rOpts_block_brace_tightness
# $rOpts_block_brace_vertical_tightness
# $rOpts_stack_closing_block_brace
# $rOpts_line_up_parentheses
# %opening_vertical_tightness
# %closing_vertical_tightness
# %opening_token_right
# %stack_closing_token
# %stack_opening_token
# Pull out needed batch variables
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
my $n_last_line = @{$ri_first} - 1;
if ( $nline < 0 || $nline > $n_last_line ) {
DEVEL_MODE && Fault("bad line index '$nline' ; max is $n_last_line\n");
return;
}
my $ibeg = $ri_first->[$nline];
my $iend = $ri_last->[$nline];
# Fix for b1503
my $is_under_stress = $levels_to_go[$ibeg] > $high_stress_level;
# Define these values for each vertical tightness type:
my (
$vt_type,
$vt_opening_flag,
$vt_closing_flag,
$vt_seqno,
$vt_valid_flag,
$vt_min_lines,
$vt_max_lines
);
# get the sequence numbers of the ends of this line
my $vt_seqno_beg =
$type_sequence_to_go[$ibeg] ? $type_sequence_to_go[$ibeg]
: $types_to_go[$ibeg] eq 'q' ? $self->get_seqno($ibeg)
: EMPTY_STRING;
my $vt_seqno_end =
$type_sequence_to_go[$iend] ? $type_sequence_to_go[$iend]
: $types_to_go[$iend] eq 'q' ? $self->get_seqno($iend)
: EMPTY_STRING;
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1:
# Handle Lines 1 .. n-1 but not the last line
# For non-BLOCK tokens, we will need to examine the next line
# too, so we won't consider the last line.
#--------------------------------------------------------------
if ( $nline < $n_last_line ) {
# NOTE: Section 1 has 4 sub-sections: 1a, 1b, 1c, and 1d. The logic to
# reach any of these end states is complex, and it is possible but very
# unlikely that more than one of these end states could be reached.
# The current logic is to keep going and use the last such state.
# There are currently no known instances where multiple end states can
# be reached, but it is something to be aware of when making changes.
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1a:
# Look for Type 1, last token of this line is a non-block opening token
#--------------------------------------------------------------
my $ibeg_next = $ri_first->[ $nline + 1 ];
my $token_end = $tokens_to_go[$iend];
my $iend_next = $ri_last->[ $nline + 1 ];
if (
$type_sequence_to_go[$iend]
&& !$block_type_to_go[$iend]
&& $is_opening_token{$token_end}
# minimal fix for b1503; this also works ok without the 'w' check
# but that changes more existing code.
&& !( $is_under_stress && $types_to_go[$ibeg_next] eq 'w' )
&& (
$opening_vertical_tightness{$token_end} > 0
# allow 2-line method call to be closed up
|| ( $rOpts_line_up_parentheses
&& $token_end eq '('
&& $self->[_rlp_object_by_seqno_]
->{ $type_sequence_to_go[$iend] }
&& $iend > $ibeg
&& $types_to_go[ $iend - 1 ] ne 'b' )
)
)
{
# avoid multiple jumps in nesting depth in one line if
# requested
my $ovt = $opening_vertical_tightness{$token_end};
# if we are in -lp and the next line ends in a weld..
if ( $rOpts_line_up_parentheses
&& $self->[_rK_weld_left_]->{ $K_to_go[$iend_next] } )
{
my $type_end_next = $types_to_go[$iend_next];
# Turn off -vt if the next line ends in a closing token. This
# avoids an instability with one-line welds (b1183).
if ( $is_closing_type{$type_end_next} ) {
$ovt = 0;
}
# Turn off -vt if the next line ends in an opening token. This
# avoids an instability (b1460).
elsif ( $is_opening_type{$type_end_next} ) {
$ovt = 0;
}
# neither opening nor closing
else {
}
}
# The flag '_rbreak_container_' avoids conflict of -bom and -pt=1
# or -pt=2; fixes b1270. See similar patch above for $cvt.
my $seqno = $type_sequence_to_go[$iend];
if ( $ovt
&& $seqno
&& $self->[_rbreak_container_]->{$seqno} )
{
$ovt = 0;
}
# The flag '_rmax_vertical_tightness_' avoids welding conflicts.
if ( defined( $self->[_rmax_vertical_tightness_]->{$seqno} ) ) {
$ovt =
min( $ovt, $self->[_rmax_vertical_tightness_]->{$seqno} );
}
if (
$ovt >= 2
|| ( $nesting_depth_to_go[ $iend_next + 1 ] ==
$nesting_depth_to_go[$ibeg_next] )
)
{
# If -vt flag has not been set, mark this as invalid
# and aligner will validate it if it sees the closing paren
# within 2 lines.
my $valid_flag = $ovt;
$vt_type = 1;
$vt_opening_flag = $ovt;
$vt_closing_flag = 0;
$vt_seqno = $type_sequence_to_go[$iend];
$vt_valid_flag = $valid_flag;
$vt_min_lines = 0;
$vt_max_lines = 0;
}
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1b:
# Look for Type 2, first token of next line is a non-block closing
# token .. and be sure this line does not have a side comment
#--------------------------------------------------------------
my $token_next = $tokens_to_go[$ibeg_next];
if (
$type_sequence_to_go[$ibeg_next]
&& !$block_type_to_go[$ibeg_next]
&& $is_closing_token{$token_next}
&& !$self->[_rbreak_container_]
->{ $type_sequence_to_go[$ibeg_next] } # b1498
&& $types_to_go[$iend] ne '#'
) # for safety, shouldn't happen!
{
my $cvt = $closing_vertical_tightness{$token_next};
my $seqno = $type_sequence_to_go[$ibeg_next];
# Avoid conflict of -bom and -pvt=1 or -pvt=2, fixes b977, b1303
# See similar patch above for $ovt.
# NOTE: this is overriden by fix for b1498 above and can
# eventually be removed.
if ( 0 && $cvt && $self->[_rbreak_container_]->{$seqno} ) {
$cvt = 0;
}
# Implement cvt=3: like cvt=0 for assigned structures, like cvt=1
# otherwise. Added for rt136417.
if ( $cvt == 3 ) {
$cvt = $self->[_ris_assigned_structure_]->{$seqno} ? 0 : 1;
}
# The unusual combination -pvtc=2 -dws -naws can be unstable.
# This fixes b1282, b1283. This can be moved to set_options.
if ( $cvt == 2
&& $rOpts_delete_old_whitespace
&& !$rOpts_add_whitespace )
{
$cvt = 1;
}
# Fix for b1379, b1380, b1381, b1382, b1384 part 2,
# instability with adding and deleting trailing commas:
# Reducing -cvt=2 to =1 fixes stability for -wtc=b in b1379,1380.
# Reducing -cvt>0 to =0 fixes stability for -wtc=b in b1381,1382.
# Reducing -cvt>0 to =0 fixes stability for -wtc=m in b1384
if ( $cvt
&& $self->[_ris_bare_trailing_comma_by_seqno_]->{$seqno} )
{
$cvt = 0;
}
if (
# Never append a trailing line like ')->pack(' because it
# will throw off later alignment. So this line must start at a
# deeper level than the next line (fix1 for welding, git #45).
(
$nesting_depth_to_go[$ibeg_next] >=
$nesting_depth_to_go[ $iend_next + 1 ] + 1
)
&& (
$cvt == 2
|| (
!$self->is_in_list_by_i($ibeg_next)
&& (
$cvt == 1
# allow closing up 2-line method calls
|| ( $rOpts_line_up_parentheses
&& $token_next eq ')'
&& $type_sequence_to_go[$ibeg_next]
&& $self->[_rlp_object_by_seqno_]
->{ $type_sequence_to_go[$ibeg_next] } )
)
)
)
)
{
# decide which trailing closing tokens to append..
my $ok = 0;
if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
else {
my $str = join( EMPTY_STRING,
@types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
# append closing token if followed by comment or ';'
# or another closing token (fix2 for welding, git #45)
if ( $str =~ /^b?[\)\]\}R#;]/ ) { $ok = 1 }
}
if ($ok) {
my $valid_flag = $cvt;
my $min_lines = 0;
my $max_lines = 0;
# Fix for b1187 and b1188: Blinking can occur if we allow
# welded tokens to re-form into one-line blocks during
# vertical alignment when -lp used. So for this case we
# set the minimum number of lines to be 1 instead of 0.
# The maximum should be 1 if -vtc is not used. If -vtc is
# used, we turn the valid
# flag off and set the maximum to 0. This is equivalent to
# using a large number.
my $seqno_ibeg_next = $type_sequence_to_go[$ibeg_next];
if ( $rOpts_line_up_parentheses
&& $total_weld_count
&& $seqno_ibeg_next
&& $self->[_rlp_object_by_seqno_]->{$seqno_ibeg_next}
&& $self->is_welded_at_seqno($seqno_ibeg_next) )
{
$min_lines = 1;
$max_lines = $cvt ? 0 : 1;
$valid_flag = 0;
}
$vt_type = 2;
$vt_opening_flag = 0;
$vt_closing_flag = $tightness{$token_next} == 2 ? 0 : 1;
$vt_seqno = $type_sequence_to_go[$ibeg_next];
$vt_valid_flag = $valid_flag;
$vt_min_lines = $min_lines;
$vt_max_lines = $max_lines;
}
}
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1c:
# Implement the Opening Token Right flag (Type 2)..
# If requested, move an isolated trailing opening token to the end of
# the previous line which ended in a comma. We could do this
# in sub recombine_breakpoints but that would cause problems
# with -lp formatting. The problem is that indentation will
# quickly move far to the right in nested expressions. By
# doing it after indentation has been set, we avoid changes
# to the indentation. Actual movement of the token takes place
# in sub valign_output_step_B.
# Note added 4 May 2021: the man page suggests that the -otr flags
# are mainly for opening tokens following commas. But this seems
# to have been generalized long ago to include other situations.
# I checked the coding back to 2012 and it is essentially the same
# as here, so it is best to leave this unchanged for now.
#--------------------------------------------------------------
if (
$opening_token_right{ $tokens_to_go[$ibeg_next] }
# previous line is not opening
# (use -sot to combine with it)
&& !$is_opening_token{$token_end}
# previous line ended in one of these
# (add other cases if necessary; '=>' and '.' are not necessary
&& !$block_type_to_go[$ibeg_next]
# this is a line with just an opening token
&& ( $iend_next == $ibeg_next
|| $iend_next == $ibeg_next + 2
&& $types_to_go[$iend_next] eq '#' )
# Fix for case b1060 when both -baoo and -otr are set:
# to avoid blinking, honor the -baoo flag over the -otr flag.
# b1499 added ? and : for same reason
##&& $token_end ne '||' && $token_end ne '&&'
&& !$is_uncovered_operator{$token_end}
# Keep break after '=' if -lp. Fixes b964 b1040 b1062 b1083 b1089.
# Generalized from '=' to $is_assignment to fix b1375.
&& !(
$is_assignment{ $types_to_go[$iend] }
&& $rOpts_line_up_parentheses
&& $type_sequence_to_go[$ibeg_next]
&& $self->[_rlp_object_by_seqno_]
->{ $type_sequence_to_go[$ibeg_next] }
)
# looks bad if we align vertically with the wrong container
&& $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
# give -kba priority over -otr (b1445)
&& !$self->[_rbreak_after_Klast_]->{ $K_to_go[$iend] }
# Fix b1462, b1463: avoid possible edge instability with
# the combination -xlp and -dws
&& !(
$rOpts_extended_line_up_parentheses
&& $rOpts_delete_old_whitespace
)
)
{
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
$vt_type = 2;
$vt_opening_flag = 0;
$vt_closing_flag = $spaces;
$vt_seqno = $type_sequence_to_go[$ibeg_next];
$vt_valid_flag = 1;
$vt_min_lines = 0;
$vt_max_lines = 0;
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 1d:
# Stacking of opening and closing tokens (Type 2)
#--------------------------------------------------------------
my $stackable;
my $token_beg_next = $tokens_to_go[$ibeg_next];
# patch to make something like 'qw(' behave like an opening paren
# (aran.t)
if ( $types_to_go[$ibeg_next] eq 'q' ) {
if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
$token_beg_next = $1;
}
}
if ( $is_closing_token{$token_end}
&& $is_closing_token{$token_beg_next} )
{
# avoid instability of combo -bom and -sct; b1179
my $seq_next = $type_sequence_to_go[$ibeg_next];
my $bom = $seq_next && $self->[_rbreak_container_]->{$seq_next};
$stackable = $stack_closing_token{$token_beg_next}
unless ( $block_type_to_go[$ibeg_next] || $bom );
}
elsif ($is_opening_token{$token_end}
&& $is_opening_token{$token_beg_next} )
{
$stackable = $stack_opening_token{$token_beg_next}
unless ( $block_type_to_go[$ibeg_next] ); # shouldn't happen
}
else {
## not stackable
}
if ($stackable) {
my $is_semicolon_terminated;
if ( $nline + 1 == $n_last_line ) {
my $terminal_type = terminal_type_i( $ibeg_next, $iend_next );
$is_semicolon_terminated = $terminal_type eq ';'
&& $nesting_depth_to_go[$iend_next] <
$nesting_depth_to_go[$ibeg_next];
}
# this must be a line with just an opening token
# or end in a semicolon
if (
$is_semicolon_terminated
|| ( $iend_next == $ibeg_next
|| $iend_next == $ibeg_next + 2
&& $types_to_go[$iend_next] eq '#' )
)
{
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
$vt_type = 2;
$vt_opening_flag = 0;
$vt_closing_flag = $spaces;
$vt_seqno = $type_sequence_to_go[$ibeg_next];
$vt_valid_flag = 1;
$vt_min_lines = 0;
$vt_max_lines = 0;
}
}
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 2:
# Handle type 3, opening block braces on last line of the batch
# Check for a last line with isolated opening BLOCK curly
#--------------------------------------------------------------
elsif ($rOpts_block_brace_vertical_tightness
&& $ibeg eq $iend
&& $types_to_go[$iend] eq '{'
&& $block_type_to_go[$iend]
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/ )
{
$vt_type = 3;
$vt_opening_flag = $rOpts_block_brace_vertical_tightness;
$vt_closing_flag = 0;
$vt_seqno = 0;
$vt_valid_flag = 1;
$vt_min_lines = 0;
$vt_max_lines = 0;
}
#--------------------------------------------------------------
# Vertical Tightness Flags Section 3:
# Handle type 4, a closing block brace on the last line of the batch Check
# for a last line with isolated closing BLOCK curly
# Patch: added a check for any new closing side comment which the
# -csc option may generate. If it exists, there will be a side comment
# so we cannot combine with a brace on the next line. This issue
# occurs for the combination -scbb and -csc is used.
#--------------------------------------------------------------
elsif ($rOpts_stack_closing_block_brace
&& $ibeg eq $iend
&& $block_type_to_go[$iend]
&& $types_to_go[$iend] eq '}'
&& ( !$closing_side_comment || $nline < $n_last_line ) )
{
my $spaces = $rOpts_block_brace_tightness == 2 ? 0 : 1;
$vt_type = 4;
$vt_opening_flag = 0;
$vt_closing_flag = $spaces;
$vt_seqno = $type_sequence_to_go[$iend];
$vt_valid_flag = 1;
$vt_min_lines = 0;
$vt_max_lines = 0;
}
else {
# no -vt flags apply
}
my $last_vt_type = $self->[_last_vt_type_];
$self->[_last_vt_type_] = $vt_type;
if ( !$vt_type ) {
# Make a simple return if this line is not involved in vertical
# tightness at all.
if ( !$vt_seqno_beg
&& !$vt_seqno_end
&& !$last_vt_type )
{
return;
}
$vt_type = 0;
$vt_opening_flag = 0;
$vt_closing_flag = 0;
$vt_seqno = 0;
$vt_valid_flag = 0;
$vt_min_lines = 0;
$vt_max_lines = 0;
}
else {
# guard against undefined sequence numbers pulled from _to_go
if ( !defined($vt_seqno) ) { $vt_seqno = 0 }
}
# return the full data structure
return {
_vt_type => $vt_type,
_vt_opening_flag => $vt_opening_flag,
_vt_closing_flag => $vt_closing_flag,
_vt_seqno => $vt_seqno,
_vt_valid_flag => $vt_valid_flag,
_vt_min_lines => $vt_min_lines,
_vt_max_lines => $vt_max_lines,
_vt_seqno_beg => $vt_seqno_beg,
_vt_seqno_end => $vt_seqno_end,
};
} ## end sub set_vertical_tightness_flags
##########################################################
# CODE SECTION 14: Code for creating closing side comments
##########################################################
{ ## begin closure accumulate_csc_text
# These routines are called once per batch when the --closing-side-comments flag
# has been set.
my $rblock_leading_text;
my %block_opening_line_number;
my $csc_new_statement_ok;
my $csc_last_label;
my %csc_block_label;
my $accumulating_text_for_block;
my $leading_block_text;
my $rleading_block_if_elsif_text;
my $leading_block_text_level;
my $leading_block_text_length_exceeded;
my $leading_block_text_line_length;
my $leading_block_text_line_number;
sub initialize_csc_vars {
$rblock_leading_text = {};
%block_opening_line_number = ();
$csc_new_statement_ok = 1;
$csc_last_label = EMPTY_STRING;
%csc_block_label = ();
$rleading_block_if_elsif_text = [];
$accumulating_text_for_block = EMPTY_STRING;
reset_block_text_accumulator();
return;
} ## end sub initialize_csc_vars
sub reset_block_text_accumulator {
# save text after 'if' and 'elsif' to append after 'else'
if ($accumulating_text_for_block) {
## ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
if ( $is_if_elsif{$accumulating_text_for_block} ) {
push @{$rleading_block_if_elsif_text}, $leading_block_text;
}
}
$accumulating_text_for_block = EMPTY_STRING;
$leading_block_text = EMPTY_STRING;
$leading_block_text_level = 0;
$leading_block_text_length_exceeded = 0;
$leading_block_text_line_number = 0;
$leading_block_text_line_length = 0;
return;
} ## end sub reset_block_text_accumulator
sub set_block_text_accumulator {
my ( $self, $i ) = @_;
$accumulating_text_for_block = $tokens_to_go[$i];
if ( $accumulating_text_for_block !~ /^els/ ) {
$rleading_block_if_elsif_text = [];
}
$leading_block_text = EMPTY_STRING;
$leading_block_text_level = $levels_to_go[$i];
$leading_block_text_line_number = $self->get_output_line_number();
$leading_block_text_length_exceeded = 0;
# this will contain the column number of the last character
# of the closing side comment
$leading_block_text_line_length =
length($csc_last_label) +
length($accumulating_text_for_block) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$leading_block_text_level * $rOpts_indent_columns + 3;
return;
} ## end sub set_block_text_accumulator
sub accumulate_block_text {
my ( $self, $i ) = @_;
# accumulate leading text for -csc, ignoring any side comments
if ( $accumulating_text_for_block
&& !$leading_block_text_length_exceeded
&& $types_to_go[$i] ne '#' )
{
my $added_length = $token_lengths_to_go[$i];
$added_length += 1 if $i == 0;
my $new_line_length =
$leading_block_text_line_length + $added_length;
# we can add this text if we don't exceed some limits..
if (
# we must not have already exceeded the text length limit
length($leading_block_text) <
$rOpts_closing_side_comment_maximum_text
# and either:
# the new total line length must be below the line length limit
# or the new length must be below the text length limit
# (ie, we may allow one token to exceed the text length limit)
&& (
$new_line_length <
$maximum_line_length_at_level[$leading_block_text_level]
|| length($leading_block_text) + $added_length <
$rOpts_closing_side_comment_maximum_text
)
# UNLESS: we are adding a closing paren before the brace we
# seek. This is an attempt to avoid situations where the ...
# to be added are longer than the omitted right paren, as in:
#foreach my $item (@a_rather_long_variable_name_here) {
# &whatever;
#} ## end foreach my $item (@a_rather_long_variable_name_here...
|| (
$tokens_to_go[$i] eq ')'
&& (
(
$i + 1 <= $max_index_to_go
&& $block_type_to_go[ $i + 1 ]
&& $block_type_to_go[ $i + 1 ] eq
$accumulating_text_for_block
)
|| ( $i + 2 <= $max_index_to_go
&& $block_type_to_go[ $i + 2 ]
&& $block_type_to_go[ $i + 2 ] eq
$accumulating_text_for_block )
)
)
)
{
# add an extra space at each newline
if ( $i == 0 && $types_to_go[$i] ne 'b' ) {
$leading_block_text .= SPACE;
}
# add the token text
$leading_block_text .= $tokens_to_go[$i];
$leading_block_text_line_length = $new_line_length;
}
# show that text was truncated if necessary
elsif ( $types_to_go[$i] ne 'b' ) {
$leading_block_text_length_exceeded = 1;
$leading_block_text .= '...';
}
else {
# not enough space to add text
}
}
return;
} ## end sub accumulate_block_text
sub accumulate_csc_text {
my ($self) = @_;
# called once per output buffer when -csc is used. Accumulates
# the text placed after certain closing block braces.
# Defines and returns the following for this buffer:
my $block_leading_text =
EMPTY_STRING; # the leading text of the last '}'
my $rblock_leading_if_elsif_text;
my $i_block_leading_text =
-1; # index of token owning block_leading_text
my $block_line_count = 100; # how many lines the block spans
my $terminal_type = 'b'; # type of last nonblank token
my $i_terminal = 0; # index of last nonblank token
my $terminal_block_type = EMPTY_STRING;
# update most recent statement label
$csc_last_label = EMPTY_STRING unless ($csc_last_label);
if ( $types_to_go[0] eq 'J' ) { $csc_last_label = $tokens_to_go[0] }
my $block_label = $csc_last_label;
# Loop over all tokens of this batch
for my $i ( 0 .. $max_index_to_go ) {
my $type = $types_to_go[$i];
my $block_type = $block_type_to_go[$i];
my $token = $tokens_to_go[$i];
$block_type = EMPTY_STRING unless ($block_type);
# remember last nonblank token type
if ( $type ne '#' && $type ne 'b' ) {
$terminal_type = $type;
$terminal_block_type = $block_type;
$i_terminal = $i;
}
my $type_sequence = $type_sequence_to_go[$i];
if ( $block_type && $type_sequence ) {
if ( $token eq '}' ) {
# restore any leading text saved when we entered this block
if ( defined( $rblock_leading_text->{$type_sequence} ) ) {
( $block_leading_text, $rblock_leading_if_elsif_text )
= @{ $rblock_leading_text->{$type_sequence} };
$i_block_leading_text = $i;
delete $rblock_leading_text->{$type_sequence};
$rleading_block_if_elsif_text =
$rblock_leading_if_elsif_text;
}
if ( defined( $csc_block_label{$type_sequence} ) ) {
$block_label = $csc_block_label{$type_sequence};
delete $csc_block_label{$type_sequence};
}
# if we run into a '}' then we probably started accumulating
# at something like a trailing 'if' clause..no harm done.
if ( $accumulating_text_for_block
&& $levels_to_go[$i] <= $leading_block_text_level )
{
reset_block_text_accumulator();
}
if ( defined( $block_opening_line_number{$type_sequence} ) )
{
my $output_line_number =
$self->get_output_line_number();
$block_line_count =
$output_line_number -
$block_opening_line_number{$type_sequence} + 1;
delete $block_opening_line_number{$type_sequence};
}
else {
# Error: block opening line undefined for this line..
# This shouldn't be possible, but it is not a
# significant problem.
}
}
elsif ( $token eq '{' ) {
my $line_number = $self->get_output_line_number();
$block_opening_line_number{$type_sequence} = $line_number;
# set a label for this block, except for
# a bare block which already has the label
# A label can only be used on the next {
if ( $block_type =~ /:$/ ) {
$csc_last_label = EMPTY_STRING;
}
$csc_block_label{$type_sequence} = $csc_last_label;
$csc_last_label = EMPTY_STRING;
if ( $accumulating_text_for_block
&& $levels_to_go[$i] == $leading_block_text_level )
{
if ( $accumulating_text_for_block eq $block_type ) {
# save any leading text before we enter this block
$rblock_leading_text->{$type_sequence} = [
$leading_block_text,
$rleading_block_if_elsif_text
];
$block_opening_line_number{$type_sequence} =
$leading_block_text_line_number;
reset_block_text_accumulator();
}
else {
# shouldn't happen, but not a serious error.
# We were accumulating -csc text for block type
# $accumulating_text_for_block and unexpectedly
# encountered a '{' for block type $block_type.
}
}
}
else {
## should not get here
DEVEL_MODE
&& Fault("token=$token should be '{' or '}' for block\n");
}
}
if ( $type eq 'k'
&& $csc_new_statement_ok
&& $is_if_elsif_else_unless_while_until_for_foreach{$token}
&& $token =~ /$closing_side_comment_list_pattern/
&& $token !~ /$closing_side_comment_exclusion_pattern/ )
{
$self->set_block_text_accumulator($i);
}
else {
# note: ignoring type 'q' because of tricks being played
# with 'q' for hanging side comments
if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
$csc_new_statement_ok =
( $block_type || $type eq 'J' || $type eq ';' );
}
if ( $type eq ';'
&& $accumulating_text_for_block
&& $levels_to_go[$i] == $leading_block_text_level )
{
reset_block_text_accumulator();
}
else {
$self->accumulate_block_text($i);
}
}
}
# Treat an 'else' block specially by adding preceding 'if' and
# 'elsif' text. Otherwise, the 'end else' is not helpful,
# especially for cuddled-else formatting.
if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
$block_leading_text =
$self->make_else_csc_text( $i_terminal, $terminal_block_type,
$block_leading_text, $rblock_leading_if_elsif_text );
}
# if this line ends in a label then remember it for the next pass
$csc_last_label = EMPTY_STRING;
if ( $terminal_type eq 'J' ) {
$csc_last_label = $tokens_to_go[$i_terminal];
}
return ( $terminal_type, $i_terminal, $i_block_leading_text,
$block_leading_text, $block_line_count, $block_label );
} ## end sub accumulate_csc_text
sub make_else_csc_text {
# create additional -csc text for an 'else' and optionally 'elsif',
# depending on the value of switch
#
# = 0 add 'if' text to trailing else
# = 1 same as 0 plus:
# add 'if' to 'elsif's if can fit in line length
# add last 'elsif' to trailing else if can fit in one line
# = 2 same as 1 but do not check if exceed line length
#
# $rif_elsif_text = a reference to a list of all previous closing
# side comments created for this if block
#
my ( $self, $i_terminal, $block_type, $block_leading_text,
$rif_elsif_text )
= @_;
my $csc_text = $block_leading_text;
if ( $block_type eq 'elsif'
&& $rOpts_closing_side_comment_else_flag == 0 )
{
return $csc_text;
}
my $count = @{$rif_elsif_text};
return $csc_text unless ($count);
my $if_text = '[ if' . $rif_elsif_text->[0];
# always show the leading 'if' text on 'else'
if ( $block_type eq 'else' ) {
$csc_text .= $if_text;
}
# see if that's all
if ( $rOpts_closing_side_comment_else_flag == 0 ) {
return $csc_text;
}
my $last_elsif_text = EMPTY_STRING;
if ( $count > 1 ) {
$last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
}
# tentatively append one more item
my $saved_text = $csc_text;
if ( $block_type eq 'else' ) {
$csc_text .= $last_elsif_text;
}
else {
$csc_text .= SPACE . $if_text;
}
# all done if no length checks requested
if ( $rOpts_closing_side_comment_else_flag == 2 ) {
return $csc_text;
}
# undo it if line length exceeded
my $length =
length($csc_text) +
length($block_type) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
if (
$length > $maximum_line_length_at_level[$leading_block_text_level] )
{
$csc_text = $saved_text;
}
return $csc_text;
} ## end sub make_else_csc_text
} ## end closure accumulate_csc_text
{ ## begin closure balance_csc_text
# Some additional routines for handling the --closing-side-comments option
my %matching_char;
BEGIN {
%matching_char = (
'{' => '}',
'(' => ')',
'[' => ']',
'}' => '{',
')' => '(',
']' => '[',
);
} ## end BEGIN
sub balance_csc_text {
# Append characters to balance a closing side comment so that editors
# such as vim can correctly jump through code.
# Simple Example:
# input = ## end foreach my $foo ( sort { $b ...
# output = ## end foreach my $foo ( sort { $b ...})
# NOTE: This routine does not currently filter out structures within
# quoted text because the bounce algorithms in text editors do not
# necessarily do this either (a version of vim was checked and
# did not do this).
# Some complex examples which will cause trouble for some editors:
# while ( $mask_string =~ /\{[^{]*?\}/g ) {
# if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) {
# if ( $1 eq '{' ) {
# test file test1/braces.pl has many such examples.
my ($csc) = @_;
# loop to examine characters one-by-one, RIGHT to LEFT and
# build a balancing ending, LEFT to RIGHT.
foreach my $pos ( reverse( 0 .. length($csc) - 1 ) ) {
my $char = substr( $csc, $pos, 1 );
# ignore everything except structural characters
next unless ( $matching_char{$char} );
# pop most recently appended character
my $top = chop $csc;
# push it back plus the mate to the newest character
# unless they balance each other.
$csc = $csc . $top . $matching_char{$char} unless ( $top eq $char );
}
# return the balanced string
return $csc;
} ## end sub balance_csc_text
} ## end closure balance_csc_text
sub get_asub_block_label {
my ( $self, $seqno ) = @_;
# Given:
# $seqno = the sequence number of an asub block
# Return:
# $block_label = the text # that will be displayed before 'sub' in its
# closing side comment.
# Note: see similar inline code in sub find_selected_blocks
# Example:
# my $doit = sub { ...
# |
# ^----------walk back from here to get
# $block_label = '$doit ='
my $block_label = EMPTY_STRING;
return $block_label unless ($seqno);
my $K_opening = $self->[_K_opening_container_]->{$seqno};
my $rLL = $self->[_rLL_];
return $block_label unless ($K_opening);
my $K_search_min = max( 0, $K_opening - 6 );
my ( $saw_equals, $saw_fat_comma, $blank_count, $nonblank_count );
my $text = EMPTY_STRING;
foreach my $KK ( reverse( $K_search_min .. $K_opening - 1 ) ) {
my $token_type = $rLL->[$KK]->[_TYPE_];
my $token = $rLL->[$KK]->[_TOKEN_];
# first nonblank, keyword 'sub', is not part of the label
if ($nonblank_count) { $text = $token . $text }
if ( $token_type eq 'b' ) { $blank_count++; next }
else { $nonblank_count++ }
if ( $token_type eq '=>' ) { $saw_fat_comma++; next }
if ( $token_type eq '=' ) { $saw_equals++; next }
if ( $token_type eq 'i' && $saw_equals
|| ( $token_type eq 'w' || $token_type eq 'Q' ) && $saw_fat_comma )
{
$block_label = $text;
$block_label =~ s/\s*$//;
last;
}
}
return $block_label;
} ## end sub get_asub_block_label
sub add_closing_side_comment {
my ($self) = @_;
my $rLL = $self->[_rLL_];
my $this_batch = $self->[_this_batch_];
my $ri_first = $this_batch->[_ri_first_];
my $ri_last = $this_batch->[_ri_last_];
# add closing side comments after closing block braces if -csc used
my ( $closing_side_comment, $cscw_block_comment );
#---------------------------------------------------------------
# Step 1: loop through all tokens of this line to accumulate
# the text needed to create the closing side comments. Also see
# how the line ends.
#---------------------------------------------------------------
my ( $terminal_type, $i_terminal, $i_block_leading_text,
$block_leading_text, $block_line_count, $block_label )
= $self->accumulate_csc_text();
#---------------------------------------------------------------
# Step 2: make the closing side comment if this ends a block
#---------------------------------------------------------------
my $have_side_comment = $types_to_go[$max_index_to_go] eq '#';
# Special check for asub closing side comments (c380)
# These are the only types which follow '};' instead of a bare '}'
if ( $terminal_type eq ';' && $closing_side_comment_want_asub ) {
if ( $types_to_go[0] eq '}'
&& $tokens_to_go[0] eq '}'
&& $i_terminal == $inext_to_go[0] )
{
my $seqno = $type_sequence_to_go[0];
if ( $self->[_ris_asub_block_]->{$seqno} ) {
# reset the terminal token to be the closing brace so
# that the code below ignores the trailing semicolon
$terminal_type = '}';
$i_terminal = 0;
# create a name for this asub block
$block_label = $self->get_asub_block_label($seqno);
}
}
}
# if this line might end in a block closure..
if (
$terminal_type eq '}'
# Fix 1 for c091, this is only for blocks
&& $block_type_to_go[$i_terminal]
# ..and either
&& (
# the block is long enough
( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
# or there is an existing comment to check
|| ( $have_side_comment
&& $rOpts->{'closing-side-comment-warnings'} )
)
# .. and if this is one of the types of interest
&& $block_type_to_go[$i_terminal] =~
/$closing_side_comment_list_pattern/
&& $block_type_to_go[$i_terminal] !~
/$closing_side_comment_exclusion_pattern/
# ..and the corresponding opening brace must is not in this batch
# (because we do not need to tag one-line blocks, although this
# should also be caught with a positive -csci value)
&& !defined( $mate_index_to_go[$i_terminal] )
# ..and either
&& (
# this is the last token (line doesn't have a side comment)
!$have_side_comment
# or the old side comment is a closing side comment
|| $tokens_to_go[$max_index_to_go] =~
/$closing_side_comment_prefix_pattern/
)
)
{
# then make the closing side comment text
if ($block_label) { $block_label .= SPACE }
my $token =
"$rOpts->{'closing-side-comment-prefix'} $block_label$block_type_to_go[$i_terminal]";
# append any extra descriptive text collected above
if ( $i_block_leading_text == $i_terminal ) {
$token .= $block_leading_text;
}
$token = balance_csc_text($token)
if $rOpts->{'closing-side-comments-balanced'};
$token =~ s/\s+$//; # trim any trailing whitespace
# handle case of existing closing side comment
if ($have_side_comment) {
# warn if requested and tokens differ significantly
if ( $rOpts->{'closing-side-comment-warnings'} ) {
my $old_csc = $tokens_to_go[$max_index_to_go];
my $new_csc = $token;
$new_csc =~ s/\s+//g; # trim all whitespace
$old_csc =~ s/\s+//g; # trim all whitespace
$new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
$old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures
# trim trailing '...'
my $new_trailing_dots = $new_csc =~ s/\.\.\.$//;
$old_csc =~ s/\.\.\.\s*$//;
# Patch to handle multiple closing side comments at
# else and elsif's. These have become too complicated
# to check, so if we see an indication of
# '[ if' or '[ # elsif', then assume they were made
# by perltidy.
if ( $block_type_to_go[$i_terminal] eq 'else' ) {
if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
}
elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
}
else {
# neither else or elsif
}
# if old comment is contained in new comment,
# only compare the common part.
if ( length($new_csc) > length($old_csc) ) {
$new_csc = substr( $new_csc, 0, length($old_csc) );
}
# if the new comment is shorter and has been limited,
# only compare the common part.
if ( length($new_csc) < length($old_csc)
&& $new_trailing_dots )
{
$old_csc = substr( $old_csc, 0, length($new_csc) );
}
# any remaining difference?
if ( $new_csc ne $old_csc ) {
# just leave the old comment if we are below the threshold
# for creating side comments
if ( $block_line_count <
$rOpts->{'closing-side-comment-interval'} )
{
$token = undef;
}
# otherwise we'll make a note of it
else {
my $msg_line_number;
my $K = $K_to_go[$i_terminal];
if ( defined($K) ) {
$msg_line_number = $rLL->[$K]->[_LINE_INDEX_] + 1;
}
warning(
"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n",
$msg_line_number
);
# save the old side comment in a new trailing block
# comment
my $timestamp = EMPTY_STRING;
if ( $rOpts->{'timestamp'} ) {
my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
$year += 1900;
$month += 1;
$timestamp = "$year-$month-$day";
}
$cscw_block_comment =
"## perltidy -cscw $timestamp: $tokens_to_go[$max_index_to_go]";
}
}
# No differences.. we can safely delete old comment if we
# are below the threshold
elsif ( $block_line_count <
$rOpts->{'closing-side-comment-interval'} )
{
# Since the line breaks have already been set, we have
# to remove the token from the _to_go array and also
# from the line range (this fixes issue c081).
# Note that we can only get here if -cscw has been set
# because otherwise the old comment is already deleted.
$token = undef;
my $ibeg = $ri_first->[-1];
my $iend = $ri_last->[-1];
if ( $iend > $ibeg
&& $iend == $max_index_to_go
&& $types_to_go[$max_index_to_go] eq '#' )
{
$iend--;
$max_index_to_go--;
if ( $iend > $ibeg
&& $types_to_go[$max_index_to_go] eq 'b' )
{
$iend--;
$max_index_to_go--;
}
$ri_last->[-1] = $iend;
}
}
else {
## above threshold, cannot delete
}
}
# switch to the new csc (unless we deleted it!)
if ($token) {
my $len_tok = length($token); # NOTE: length no longer important
my $added_len =
$len_tok - $token_lengths_to_go[$max_index_to_go];
$tokens_to_go[$max_index_to_go] = $token;
$token_lengths_to_go[$max_index_to_go] = $len_tok;
my $K = $K_to_go[$max_index_to_go];
$rLL->[$K]->[_TOKEN_] = $token;
$rLL->[$K]->[_TOKEN_LENGTH_] = $len_tok;
$summed_lengths_to_go[ $max_index_to_go + 1 ] += $added_len;
}
}
# handle case of NO existing closing side comment
else {
# To avoid inserting a new token in the token arrays, we
# will just return the new side comment so that it can be
# inserted just before it is needed in the call to the
# vertical aligner.
$closing_side_comment = $token;
}
}
return ( $closing_side_comment, $cscw_block_comment );
} ## end sub add_closing_side_comment
############################
# CODE SECTION 15: Summarize
############################
sub wrapup {
# This is the last routine called when a file is formatted.
# Flush buffer and write any informative messages
my ( $self, ($severe_error) ) = @_;
# Optional parameter:
# $severe_error = true if program is ending on an error
# false for normal end
$self->flush();
my $file_writer_object = $self->[_file_writer_object_];
$file_writer_object->decrement_output_line_number()
; # fix up line number since it was incremented
we_are_at_the_last_line();
my $max_depth = $self->[_maximum_BLOCK_level_];
my $at_line = $self->[_maximum_BLOCK_level_at_line_];
write_logfile_entry(
"Maximum leading structural depth is $max_depth in input at line $at_line\n"
);
my $added_semicolon_count = $self->[_added_semicolon_count_];
my $first_added_semicolon_at = $self->[_first_added_semicolon_at_];
my $last_added_semicolon_at = $self->[_last_added_semicolon_at_];
if ( $added_semicolon_count > 0 ) {
my $first = ( $added_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
my $what =
( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
write_logfile_entry("$added_semicolon_count $what added:\n");
write_logfile_entry(
" $first at input line $first_added_semicolon_at\n");
if ( $added_semicolon_count > 1 ) {
write_logfile_entry(
" Last at input line $last_added_semicolon_at\n");
}
write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
write_logfile_entry("\n");
}
my $deleted_semicolon_count = $self->[_deleted_semicolon_count_];
my $first_deleted_semicolon_at = $self->[_first_deleted_semicolon_at_];
my $last_deleted_semicolon_at = $self->[_last_deleted_semicolon_at_];
if ( $deleted_semicolon_count > 0 ) {
my $first = ( $deleted_semicolon_count > 1 ) ? "First" : EMPTY_STRING;
my $what =
( $deleted_semicolon_count > 1 )
? "semicolons were"
: "semicolon was";
write_logfile_entry(
"$deleted_semicolon_count unnecessary $what deleted:\n");
write_logfile_entry(
" $first at input line $first_deleted_semicolon_at\n");
if ( $deleted_semicolon_count > 1 ) {
write_logfile_entry(
" Last at input line $last_deleted_semicolon_at\n");
}
write_logfile_entry(" (Use -ndsm to prevent semicolon deletion)\n");
write_logfile_entry("\n");
}
my $embedded_tab_count = $self->[_embedded_tab_count_];
my $first_embedded_tab_at = $self->[_first_embedded_tab_at_];
my $last_embedded_tab_at = $self->[_last_embedded_tab_at_];
if ( $embedded_tab_count > 0 ) {
my $first = ( $embedded_tab_count > 1 ) ? "First" : EMPTY_STRING;
my $what =
( $embedded_tab_count > 1 )
? "quotes or patterns"
: "quote or pattern";
write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
write_logfile_entry(
"This means the display of this script could vary with device or software\n"
);
write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
if ( $embedded_tab_count > 1 ) {
write_logfile_entry(
" Last at input line $last_embedded_tab_at\n");
}
write_logfile_entry("\n");
}
my $first_tabbing_disagreement = $self->[_first_tabbing_disagreement_];
my $last_tabbing_disagreement = $self->[_last_tabbing_disagreement_];
my $in_tabbing_disagreement = $self->[_in_tabbing_disagreement_];
if ($first_tabbing_disagreement) {
write_logfile_entry(
"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
);
}
my $first_btd = $self->[_first_brace_tabbing_disagreement_];
if ($first_btd) {
my $msg =
"First closing brace indentation disagreement started at input line $first_btd\n";
write_logfile_entry($msg);
# leave a hint in the .ERR file if there was a brace error
if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
}
my $in_btd = $self->[_in_brace_tabbing_disagreement_];
if ($in_btd) {
my $msg =
"Ending with brace indentation disagreement which started at input line $in_btd\n";
write_logfile_entry($msg);
# leave a hint in the .ERR file if there was a brace error
if ( get_saw_brace_error() ) { warning("NOTE: $msg") }
}
if ($in_tabbing_disagreement) {
my $msg =
"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n";
write_logfile_entry($msg);
}
else {
if ($last_tabbing_disagreement) {
write_logfile_entry(
"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
);
}
else {
write_logfile_entry("No indentation disagreement seen\n");
}
}
if ($first_tabbing_disagreement) {
write_logfile_entry(
"Note: Indentation disagreement detection is not accurate for outdenting and -lp.\n"
);
}
write_logfile_entry("\n");
my $vao = $self->[_vertical_aligner_object_];
$vao->report_anything_unusual();
$file_writer_object->report_line_length_errors();
# Define the formatter self-check for convergence.
$self->[_converged_] = $severe_error
|| (!$self->[_want_second_iteration_]
&& $file_writer_object->get_convergence_check() )
|| $rOpts->{'indent-only'};
return;
} ## end sub wrapup
} ## end package Perl::Tidy::Formatter
1;