package Perl::Tidy;
use 5.004; BEGIN { $^W = 1; }
use strict;
use Exporter;
use Carp;
$|++;
use vars qw{
$VERSION
@ISA
@EXPORT
$missing_file_spec
};
@ISA = qw( Exporter );
@EXPORT = qw( &perltidy );
use IO::File;
use File::Basename;
BEGIN {
( $VERSION = q($Id: Tidy.pm,v 1.73 2007/12/05 17:51:17 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; }
sub streamhandle {
my $ref = ref( my $filename = shift );
my $mode = shift;
my $New;
my $fh;
if ($ref) {
if ( $ref eq 'ARRAY' ) {
$New = sub { Perl::Tidy::IOScalarArray->new(@_) };
}
elsif ( $ref eq 'SCALAR' ) {
$New = sub { Perl::Tidy::IOScalar->new(@_) };
}
else {
if ( $mode =~ /[rR]/ ) {
if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
$New = sub { $filename };
}
else {
$New = sub { undef };
confess <<EOM;
------------------------------------------------------------------------
No 'getline' method is defined for object of class $ref
Please check your call to Perl::Tidy::perltidy. Trace follows.
------------------------------------------------------------------------
EOM
}
}
if ( $mode =~ /[wW]/ ) {
if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
$New = sub { $filename };
}
else {
$New = sub { undef };
confess <<EOM;
------------------------------------------------------------------------
No 'print' method is defined for object of class $ref
Please check your call to Perl::Tidy::perltidy. Trace follows.
------------------------------------------------------------------------
EOM
}
}
}
}
else {
if ( $filename eq '-' ) {
$New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
}
else {
$New = sub { IO::File->new(@_) };
}
}
$fh = $New->( $filename, $mode )
or warn "Couldn't open file:$filename in mode:$mode : $!\n";
return $fh, ( $ref or $filename );
}
sub find_input_line_ending {
my ($input_file) = @_;
my $ending;
if ( ref($input_file) || $input_file eq '-' ) {
return $ending;
}
open( INFILE, $input_file ) || return $ending;
binmode INFILE;
my $buf;
read( INFILE, $buf, 1024 );
close INFILE;
if ( $buf && $buf =~ /([\012\015]+)/ ) {
my $test = $1;
if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
else { }
}
else { }
return $ending;
}
sub catfile {
BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
unless ($missing_file_spec) {
return File::Spec->catfile(@_);
}
my $name = pop @_;
my $path = join '/', @_;
my $test_file = $path . $name;
my ( $test_name, $test_path ) = fileparse($test_file);
return $test_file if ( $test_name eq $name );
return undef if ( $^O eq 'VMS' );
$test_file = $path . '/' . $name;
( $test_name, $test_path ) = fileparse($test_file);
return $test_file if ( $test_name eq $name );
return undef;
}
sub make_temporary_filename {
my $name = "perltidy.TMP";
if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
return $name;
}
eval "use POSIX qw(tmpnam)";
if ($@) { return $name }
use IO::File;
for ( 0 .. 1 ) {
my $tmpname = tmpnam();
my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
if ($fh) {
$fh->close();
return ($tmpname);
last;
}
}
return ($name);
}
{
my $tokenizer;
my $input_file;
sub interrupt_handler {
my $exit_flag = shift;
print STDERR "perltidy interrupted";
if ($tokenizer) {
my $input_line_number =
Perl::Tidy::Tokenizer::get_input_line_number();
print STDERR " at line $input_line_number";
}
if ($input_file) {
if ( ref $input_file ) { print STDERR " of reference to:" }
else { print STDERR " of file:" }
print STDERR " $input_file";
}
print STDERR "\n";
exit $exit_flag if defined($exit_flag);
}
sub perltidy {
my %defaults = (
argv => undef,
destination => undef,
formatter => undef,
logfile => undef,
errorfile => undef,
perltidyrc => undef,
source => undef,
stderr => undef,
dump_options => undef,
dump_options_type => undef,
dump_getopt_flags => undef,
dump_options_category => undef,
dump_options_range => undef,
dump_abbreviations => undef,
);
local @ARGV = @ARGV;
my %input_hash = @_;
if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
local $" = ')(';
my @good_keys = sort keys %defaults;
@bad_keys = sort @bad_keys;
confess <<EOM;
------------------------------------------------------------------------
Unknown perltidy parameter : (@bad_keys)
perltidy only understands : (@good_keys)
------------------------------------------------------------------------
EOM
}
my $get_hash_ref = sub {
my ($key) = @_;
my $hash_ref = $input_hash{$key};
if ( defined($hash_ref) ) {
unless ( ref($hash_ref) eq 'HASH' ) {
my $what = ref($hash_ref);
my $but_is =
$what ? "but is ref to $what" : "but is not a reference";
croak <<EOM;
------------------------------------------------------------------------
error in call to perltidy:
-$key must be reference to HASH $but_is
------------------------------------------------------------------------
EOM
}
}
return $hash_ref;
};
%input_hash = ( %defaults, %input_hash );
my $argv = $input_hash{'argv'};
my $destination_stream = $input_hash{'destination'};
my $errorfile_stream = $input_hash{'errorfile'};
my $logfile_stream = $input_hash{'logfile'};
my $perltidyrc_stream = $input_hash{'perltidyrc'};
my $source_stream = $input_hash{'source'};
my $stderr_stream = $input_hash{'stderr'};
my $user_formatter = $input_hash{'formatter'};
# various dump parameters
my $dump_options_type = $input_hash{'dump_options_type'};
my $dump_options = $get_hash_ref->('dump_options');
my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
my $dump_options_category = $get_hash_ref->('dump_options_category');
my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
my $dump_options_range = $get_hash_ref->('dump_options_range');
# validate dump_options_type
if ( defined($dump_options) ) {
unless ( defined($dump_options_type) ) {
$dump_options_type = 'perltidyrc';
}
unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
croak <<EOM;
------------------------------------------------------------------------
Please check value of -dump_options_type in call to perltidy;
saw: '$dump_options_type'
expecting: 'perltidyrc' or 'full'
------------------------------------------------------------------------
EOM
}
}
else {
$dump_options_type = "";
}
if ($user_formatter) {
# if the user defines a formatter, there is no output stream,
# but we need a null stream to keep coding simple
$destination_stream = Perl::Tidy::DevNull->new();
}
# see if ARGV is overridden
if ( defined($argv) ) {
my $rargv = ref $argv;
if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
# ref to ARRAY
if ($rargv) {
if ( $rargv eq 'ARRAY' ) {
@ARGV = @$argv;
}
else {
croak <<EOM;
------------------------------------------------------------------------
Please check value of -argv in call to perltidy;
it must be a string or ref to ARRAY but is: $rargv
------------------------------------------------------------------------
EOM
}
}
# string
else {
my ( $rargv, $msg ) = parse_args($argv);
if ($msg) {
die <<EOM;
Error parsing this string passed to to perltidy with 'argv':
$msg
EOM
}
@ARGV = @{$rargv};
}
}
# redirect STDERR if requested
if ($stderr_stream) {
my ( $fh_stderr, $stderr_file ) =
Perl::Tidy::streamhandle( $stderr_stream, 'w' );
if ($fh_stderr) { *STDERR = $fh_stderr }
else {
croak <<EOM;
------------------------------------------------------------------------
Unable to redirect STDERR to $stderr_stream
Please check value of -stderr in call to perltidy
------------------------------------------------------------------------
EOM
}
}
my $rpending_complaint;
$$rpending_complaint = "";
my $rpending_logfile_message;
$$rpending_logfile_message = "";
my ( $is_Windows, $Windows_type ) =
look_for_Windows($rpending_complaint);
# VMS file names are restricted to a 40.40 format, so we append _tdy
# instead of .tdy, etc. (but see also sub check_vms_filename)
my $dot;
my $dot_pattern;
if ( $^O eq 'VMS' ) {
$dot = '_';
$dot_pattern = '_';
}
else {
$dot = '.';
$dot_pattern = '\.'; # must escape for use in regex
}
# handle command line options
my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
$rexpansion, $roption_category, $roption_range )
= process_command_line(
$perltidyrc_stream, $is_Windows, $Windows_type,
$rpending_complaint, $dump_options_type,
);
# return or exit immediately after all dumps
my $quit_now = 0;
# Getopt parameters and their flags
if ( defined($dump_getopt_flags) ) {
$quit_now = 1;
foreach my $op ( @{$roption_string} ) {
my $opt = $op;
my $flag = "";
# Examples:
# some-option=s
# some-option=i
# some-option:i
# some-option!
if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
$opt = $1;
$flag = $2;
}
$dump_getopt_flags->{$opt} = $flag;
}
}
if ( defined($dump_options_category) ) {
$quit_now = 1;
%{$dump_options_category} = %{$roption_category};
}
if ( defined($dump_options_range) ) {
$quit_now = 1;
%{$dump_options_range} = %{$roption_range};
}
if ( defined($dump_abbreviations) ) {
$quit_now = 1;
%{$dump_abbreviations} = %{$rexpansion};
}
if ( defined($dump_options) ) {
$quit_now = 1;
%{$dump_options} = %{$rOpts};
}
return if ($quit_now);
# make printable string of options for this run as possible diagnostic
my $readable_options = readable_options( $rOpts, $roption_string );
# dump from command line
if ( $rOpts->{'dump-options'} ) {
print STDOUT $readable_options;
exit 1;
}
check_options( $rOpts, $is_Windows, $Windows_type,
$rpending_complaint );
if ($user_formatter) {
$rOpts->{'format'} = 'user';
}
# there must be one entry here for every possible format
my %default_file_extension = (
tidy => 'tdy',
html => 'html',
user => '',
);
# be sure we have a valid output format
unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
my $formats = join ' ',
sort map { "'" . $_ . "'" } keys %default_file_extension;
my $fmt = $rOpts->{'format'};
die "-format='$fmt' but must be one of: $formats\n";
}
my $output_extension =
make_extension( $rOpts->{'output-file-extension'},
$default_file_extension{ $rOpts->{'format'} }, $dot );
my $backup_extension =
make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
my $html_toc_extension =
make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
my $html_src_extension =
make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
# check for -b option;
my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
&& $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
&& @ARGV > 0; # silently ignore if standard input;
# this allows -b to be in a .perltidyrc file
# without error messages when running from an editor
# turn off -b with warnings in case of conflicts with other options
if ($in_place_modify) {
if ( $rOpts->{'standard-output'} ) {
warn "Ignoring -b; you may not use -b and -st together\n";
$in_place_modify = 0;
}
if ($destination_stream) {
warn
"Ignoring -b; you may not specify a destination array and -b together\n";
$in_place_modify = 0;
}
if ($source_stream) {
warn
"Ignoring -b; you may not specify a source array and -b together\n";
$in_place_modify = 0;
}
if ( $rOpts->{'outfile'} ) {
warn "Ignoring -b; you may not use -b and -o together\n";
$in_place_modify = 0;
}
if ( defined( $rOpts->{'output-path'} ) ) {
warn "Ignoring -b; you may not use -b and -opath together\n";
$in_place_modify = 0;
}
}
Perl::Tidy::Formatter::check_options($rOpts);
if ( $rOpts->{'format'} eq 'html' ) {
Perl::Tidy::HtmlWriter->check_options($rOpts);
}
# make the pattern of file extensions that we shouldn't touch
my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
if ($output_extension) {
my $ext = quotemeta($output_extension);
$forbidden_file_extensions .= "|$ext";
}
if ( $in_place_modify && $backup_extension ) {
my $ext = quotemeta($backup_extension);
$forbidden_file_extensions .= "|$ext";
}
$forbidden_file_extensions .= ')$';
# Create a diagnostics object if requested;
# This is only useful for code development
my $diagnostics_object = undef;
if ( $rOpts->{'DIAGNOSTICS'} ) {
$diagnostics_object = Perl::Tidy::Diagnostics->new();
}
# no filenames should be given if input is from an array
if ($source_stream) {
if ( @ARGV > 0 ) {
die
"You may not specify any filenames when a source array is given\n";
}
# we'll stuff the source array into ARGV
unshift( @ARGV, $source_stream );
# No special treatment for source stream which is a filename.
# This will enable checks for binary files and other bad stuff.
$source_stream = undef unless ref($source_stream);
}
# use stdin by default if no source array and no args
else {
unshift( @ARGV, '-' ) unless @ARGV;
}
# loop to process all files in argument list
my $number_of_files = @ARGV;
my $formatter = undef;
$tokenizer = undef;
while ( $input_file = shift @ARGV ) {
my $fileroot;
my $input_file_permissions;
#---------------------------------------------------------------
# determine the input file name
#---------------------------------------------------------------
if ($source_stream) {
$fileroot = "perltidy";
}
elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
$fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
$in_place_modify = 0;
}
else {
$fileroot = $input_file;
unless ( -e $input_file ) {
# file doesn't exist - check for a file glob
if ( $input_file =~ /([\?\*\[\{])/ ) {
# Windows shell may not remove quotes, so do it
my $input_file = $input_file;
if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
my $pattern = fileglob_to_re($input_file);
eval "/$pattern/";
if ( !$@ && opendir( DIR, './' ) ) {
my @files =
grep { /$pattern/ && !-d $_ } readdir(DIR);
closedir(DIR);
if (@files) {
unshift @ARGV, @files;
next;
}
}
}
print "skipping file: '$input_file': no matches found\n";
next;
}
unless ( -f $input_file ) {
print "skipping file: $input_file: not a regular file\n";
next;
}
unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
print
"skipping file: $input_file: Non-text (override with -f)\n";
next;
}
# we should have a valid filename now
$fileroot = $input_file;
$input_file_permissions = ( stat $input_file )[2] & 07777;
if ( $^O eq 'VMS' ) {
( $fileroot, $dot ) = check_vms_filename($fileroot);
}
# add option to change path here
if ( defined( $rOpts->{'output-path'} ) ) {
my ( $base, $old_path ) = fileparse($fileroot);
my $new_path = $rOpts->{'output-path'};
unless ( -d $new_path ) {
unless ( mkdir $new_path, 0777 ) {
die "unable to create directory $new_path: $!\n";
}
}
my $path = $new_path;
$fileroot = catfile( $path, $base );
unless ($fileroot) {
die <<EOM;
------------------------------------------------------------------------
Problem combining $new_path and $base to make a filename; check -opath
------------------------------------------------------------------------
EOM
}
}
}
# Skip files with same extension as the output files because
# this can lead to a messy situation with files like
# script.tdy.tdy.tdy ... or worse problems ... when you
# rerun perltidy over and over with wildcard input.
if (
!$source_stream
&& ( $input_file =~ /$forbidden_file_extensions/o
|| $input_file eq 'DIAGNOSTICS' )
)
{
print "skipping file: $input_file: wrong extension\n";
next;
}
# the 'source_object' supplies a method to read the input file
my $source_object =
Perl::Tidy::LineSource->new( $input_file, $rOpts,
$rpending_logfile_message );
next unless ($source_object);
# register this file name with the Diagnostics package
$diagnostics_object->set_input_file($input_file)
if $diagnostics_object;
#---------------------------------------------------------------
# determine the output file name
#---------------------------------------------------------------
my $output_file = undef;
my $actual_output_extension;
if ( $rOpts->{'outfile'} ) {
if ( $number_of_files <= 1 ) {
if ( $rOpts->{'standard-output'} ) {
die "You may not use -o and -st together\n";
}
elsif ($destination_stream) {
die
"You may not specify a destination array and -o together\n";
}
elsif ( defined( $rOpts->{'output-path'} ) ) {
die "You may not specify -o and -opath together\n";
}
elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
die "You may not specify -o and -oext together\n";
}
$output_file = $rOpts->{outfile};
# make sure user gives a file name after -o
if ( $output_file =~ /^-/ ) {
die "You must specify a valid filename after -o\n";
}
# do not overwrite input file with -o
if ( defined($input_file_permissions)
&& ( $output_file eq $input_file ) )
{
die
"Use 'perltidy -b $input_file' to modify in-place\n";
}
}
else {
die "You may not use -o with more than one input file\n";
}
}
elsif ( $rOpts->{'standard-output'} ) {
if ($destination_stream) {
die
"You may not specify a destination array and -st together\n";
}
$output_file = '-';
if ( $number_of_files <= 1 ) {
}
else {
die "You may not use -st with more than one input file\n";
}
}
elsif ($destination_stream) {
$output_file = $destination_stream;
}
elsif ($source_stream) { # source but no destination goes to stdout
$output_file = '-';
}
elsif ( $input_file eq '-' ) {
$output_file = '-';
}
else {
if ($in_place_modify) {
$output_file = IO::File->new_tmpfile()
or die "cannot open temp file for -b option: $!\n";
}
else {
$actual_output_extension = $output_extension;
$output_file = $fileroot . $output_extension;
}
}
# the 'sink_object' knows how to write the output file
my $tee_file = $fileroot . $dot . "TEE";
my $line_separator = $rOpts->{'output-line-ending'};
if ( $rOpts->{'preserve-line-endings'} ) {
$line_separator = find_input_line_ending($input_file);
}
# Eventually all I/O may be done with binmode, but for now it is
# only done when a user requests a particular line separator
# through the -ple or -ole flags
my $binmode = 0;
if ( defined($line_separator) ) { $binmode = 1 }
else { $line_separator = "\n" }
my $sink_object =
Perl::Tidy::LineSink->new( $output_file, $tee_file,
$line_separator, $rOpts, $rpending_logfile_message, $binmode );
#---------------------------------------------------------------
# initialize the error logger
#---------------------------------------------------------------
my $warning_file = $fileroot . $dot . "ERR";
if ($errorfile_stream) { $warning_file = $errorfile_stream }
my $log_file = $fileroot . $dot . "LOG";
if ($logfile_stream) { $log_file = $logfile_stream }
my $logger_object =
Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
$saw_extrude );
write_logfile_header(
$rOpts, $logger_object, $config_file,
$rraw_options, $Windows_type, $readable_options,
);
if ($$rpending_logfile_message) {
$logger_object->write_logfile_entry($$rpending_logfile_message);
}
if ($$rpending_complaint) {
$logger_object->complain($$rpending_complaint);
}
#---------------------------------------------------------------
# initialize the debug object, if any
#---------------------------------------------------------------
my $debugger_object = undef;
if ( $rOpts->{DEBUG} ) {
$debugger_object =
Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
}
#---------------------------------------------------------------
# create a formatter for this file : html writer or pretty printer
#---------------------------------------------------------------
# we have to delete any old formatter because, for safety,
# the formatter will check to see that there is only one.
$formatter = undef;
if ($user_formatter) {
$formatter = $user_formatter;
}
elsif ( $rOpts->{'format'} eq 'html' ) {
$formatter =
Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
$actual_output_extension, $html_toc_extension,
$html_src_extension );
}
elsif ( $rOpts->{'format'} eq 'tidy' ) {
$formatter = Perl::Tidy::Formatter->new(
logger_object => $logger_object,
diagnostics_object => $diagnostics_object,
sink_object => $sink_object,
);
}
else {
die "I don't know how to do -format=$rOpts->{'format'}\n";
}
unless ($formatter) {
die "Unable to continue with $rOpts->{'format'} formatting\n";
}
#---------------------------------------------------------------
# create the tokenizer for this file
#---------------------------------------------------------------
$tokenizer = undef; # must destroy old tokenizer
$tokenizer = Perl::Tidy::Tokenizer->new(
source_object => $source_object,
logger_object => $logger_object,
debugger_object => $debugger_object,
diagnostics_object => $diagnostics_object,
starting_level => $rOpts->{'starting-indentation-level'},
tabs => $rOpts->{'tabs'},
indent_columns => $rOpts->{'indent-columns'},
look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
look_for_autoloader => $rOpts->{'look-for-autoloader'},
look_for_selfloader => $rOpts->{'look-for-selfloader'},
trim_qw => $rOpts->{'trim-qw'},
);
#---------------------------------------------------------------
# now we can do it
#---------------------------------------------------------------
process_this_file( $tokenizer, $formatter );
#---------------------------------------------------------------
# close the input source and report errors
#---------------------------------------------------------------
$source_object->close_input_file();
# get file names to use for syntax check
my $ifname = $source_object->get_input_file_copy_name();
my $ofname = $sink_object->get_output_file_copy();
#---------------------------------------------------------------
# handle the -b option (backup and modify in-place)
#---------------------------------------------------------------
if ($in_place_modify) {
unless ( -f $input_file ) {
# oh, oh, no real file to backup ..
# shouldn't happen because of numerous preliminary checks
die print
"problem with -b backing up input file '$input_file': not a file\n";
}
my $backup_name = $input_file . $backup_extension;
if ( -f $backup_name ) {
unlink($backup_name)
or die
"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
}
rename( $input_file, $backup_name )
or die
"problem renaming $input_file to $backup_name for -b option: $!\n";
$ifname = $backup_name;
seek( $output_file, 0, 0 )
or die "unable to rewind tmp file for -b option: $!\n";
my $fout = IO::File->new("> $input_file")
or die
"problem opening $input_file for write for -b option; check directory permissions: $!\n";
binmode $fout;
my $line;
while ( $line = $output_file->getline() ) {
$fout->print($line);
}
$fout->close();
$output_file = $input_file;
$ofname = $input_file;
}
$sink_object->close_output_file() if $sink_object;
$debugger_object->close_debug_file() if $debugger_object;
my $infile_syntax_ok = 0; if ($output_file) {
if ($input_file_permissions) {
if ( $rOpts->{'format'} eq 'tidy' ) {
chmod( $input_file_permissions | 0600, $output_file );
}
}
if ( $logger_object && $rOpts->{'check-syntax'} ) {
$infile_syntax_ok =
check_syntax( $ifname, $ofname, $logger_object, $rOpts );
}
}
$logger_object->finish( $infile_syntax_ok, $formatter )
if $logger_object;
} } }
sub fileglob_to_re {
my $x = shift;
$x =~ s $x =~ s $x =~ s "^$x\\z"; }
sub make_extension {
my ( $extension, $default, $dot ) = @_;
$extension = $default unless ($extension);
if ( $extension =~ /^[a-zA-Z0-9]/ ) {
$extension = $dot . $extension;
}
return $extension;
}
sub write_logfile_header {
my (
$rOpts, $logger_object, $config_file,
$rraw_options, $Windows_type, $readable_options
) = @_;
$logger_object->write_logfile_entry(
"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
);
if ($Windows_type) {
$logger_object->write_logfile_entry("Windows type is $Windows_type\n");
}
my $options_string = join( ' ', @$rraw_options );
if ($config_file) {
$logger_object->write_logfile_entry(
"Found Configuration File >>> $config_file \n");
}
$logger_object->write_logfile_entry(
"Configuration and command line parameters for this run:\n");
$logger_object->write_logfile_entry("$options_string\n");
if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
$rOpts->{'logfile'} = 1; $logger_object->write_logfile_entry(
"Final parameter set for this run\n");
$logger_object->write_logfile_entry(
"------------------------------------\n");
$logger_object->write_logfile_entry($readable_options);
$logger_object->write_logfile_entry(
"------------------------------------\n");
}
$logger_object->write_logfile_entry(
"To find error messages search for 'WARNING' with your editor\n");
}
sub generate_options {
my @option_string = ();
my %expansion = ();
my %option_category = ();
my %option_range = ();
my $rexpansion = \%expansion;
my @category_name = (
'0. I/O control',
'1. Basic formatting options',
'2. Code indentation control',
'3. Whitespace control',
'4. Comment controls',
'5. Linebreak controls',
'6. Controlling list formatting',
'7. Retaining or ignoring existing line breaks',
'8. Blank line control',
'9. Other controls',
'10. HTML options',
'11. pod2html options',
'12. Controlling HTML properties',
'13. Debugging',
);
@option_string = qw(
html!
noprofile
no-profile
npro
recombine!
valign!
);
my $category = 13; foreach (@option_string) {
my $opt = $_; $opt =~ s/!$//;
$option_category{$opt} = $category_name[$category];
}
$category = 11; $option_category{html} = $category_name[$category];
my $add_option = sub {
my ( $long_name, $short_name, $flag ) = @_;
push @option_string, $long_name . $flag;
$option_category{$long_name} = $category_name[$category];
if ($short_name) {
if ( $expansion{$short_name} ) {
my $existing_name = $expansion{$short_name}[0];
die
"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
}
$expansion{$short_name} = [$long_name];
if ( $flag eq '!' ) {
my $nshort_name = 'n' . $short_name;
my $nolong_name = 'no' . $long_name;
if ( $expansion{$nshort_name} ) {
my $existing_name = $expansion{$nshort_name}[0];
die
"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
}
$expansion{$nshort_name} = [$nolong_name];
}
}
};
$category = 0; $add_option->( 'backup-and-modify-in-place', 'b', '!' );
$add_option->( 'backup-file-extension', 'bext', '=s' );
$add_option->( 'force-read-binary', 'f', '!' );
$add_option->( 'format', 'fmt', '=s' );
$add_option->( 'logfile', 'log', '!' );
$add_option->( 'logfile-gap', 'g', ':i' );
$add_option->( 'outfile', 'o', '=s' );
$add_option->( 'output-file-extension', 'oext', '=s' );
$add_option->( 'output-path', 'opath', '=s' );
$add_option->( 'profile', 'pro', '=s' );
$add_option->( 'quiet', 'q', '!' );
$add_option->( 'standard-error-output', 'se', '!' );
$add_option->( 'standard-output', 'st', '!' );
$add_option->( 'warning-output', 'w', '!' );
$add_option->( 'output-line-ending', 'ole', '=s' );
$add_option->( 'starting-indentation-level', 'sil', '=i' );
$category = 1; $add_option->( 'check-syntax', 'syn', '!' );
$add_option->( 'entab-leading-whitespace', 'et', '=i' );
$add_option->( 'indent-columns', 'i', '=i' );
$add_option->( 'maximum-line-length', 'l', '=i' );
$add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
$add_option->( 'preserve-line-endings', 'ple', '!' );
$add_option->( 'tabs', 't', '!' );
$category = 2; $add_option->( 'continuation-indentation', 'ci', '=i' );
$add_option->( 'line-up-parentheses', 'lp', '!' );
$add_option->( 'outdent-keyword-list', 'okwl', '=s' );
$add_option->( 'outdent-keywords', 'okw', '!' );
$add_option->( 'outdent-labels', 'ola', '!' );
$add_option->( 'outdent-long-quotes', 'olq', '!' );
$add_option->( 'indent-closing-brace', 'icb', '!' );
$add_option->( 'closing-token-indentation', 'cti', '=i' );
$add_option->( 'closing-paren-indentation', 'cpi', '=i' );
$add_option->( 'closing-brace-indentation', 'cbi', '=i' );
$add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
$add_option->( 'brace-left-and-indent', 'bli', '!' );
$add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
$category = 3; $add_option->( 'add-semicolons', 'asc', '!' );
$add_option->( 'add-whitespace', 'aws', '!' );
$add_option->( 'block-brace-tightness', 'bbt', '=i' );
$add_option->( 'brace-tightness', 'bt', '=i' );
$add_option->( 'delete-old-whitespace', 'dws', '!' );
$add_option->( 'delete-semicolons', 'dsm', '!' );
$add_option->( 'nospace-after-keyword', 'nsak', '=s' );
$add_option->( 'nowant-left-space', 'nwls', '=s' );
$add_option->( 'nowant-right-space', 'nwrs', '=s' );
$add_option->( 'paren-tightness', 'pt', '=i' );
$add_option->( 'space-after-keyword', 'sak', '=s' );
$add_option->( 'space-for-semicolon', 'sfs', '!' );
$add_option->( 'space-function-paren', 'sfp', '!' );
$add_option->( 'space-keyword-paren', 'skp', '!' );
$add_option->( 'space-terminal-semicolon', 'sts', '!' );
$add_option->( 'square-bracket-tightness', 'sbt', '=i' );
$add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
$add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
$add_option->( 'trim-qw', 'tqw', '!' );
$add_option->( 'want-left-space', 'wls', '=s' );
$add_option->( 'want-right-space', 'wrs', '=s' );
$category = 4; $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
$add_option->( 'closing-side-comment-interval', 'csci', '=i' );
$add_option->( 'closing-side-comment-list', 'cscl', '=s' );
$add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
$add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
$add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
$add_option->( 'closing-side-comments', 'csc', '!' );
$add_option->( 'format-skipping', 'fs', '!' );
$add_option->( 'format-skipping-begin', 'fsb', '=s' );
$add_option->( 'format-skipping-end', 'fse', '=s' );
$add_option->( 'hanging-side-comments', 'hsc', '!' );
$add_option->( 'indent-block-comments', 'ibc', '!' );
$add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
$add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
$add_option->( 'minimum-space-to-comment', 'msc', '=i' );
$add_option->( 'outdent-long-comments', 'olc', '!' );
$add_option->( 'outdent-static-block-comments', 'osbc', '!' );
$add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
$add_option->( 'static-block-comments', 'sbc', '!' );
$add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
$add_option->( 'static-side-comments', 'ssc', '!' );
$category = 5; $add_option->( 'add-newlines', 'anl', '!' );
$add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
$add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
$add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
$add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
$add_option->( 'cuddled-else', 'ce', '!' );
$add_option->( 'delete-old-newlines', 'dnl', '!' );
$add_option->( 'opening-brace-always-on-right', 'bar', '!' );
$add_option->( 'opening-brace-on-new-line', 'bl', '!' );
$add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
$add_option->( 'opening-paren-right', 'opr', '!' );
$add_option->( 'opening-square-bracket-right', 'osbr', '!' );
$add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
$add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
$add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
$add_option->( 'stack-closing-hash-brace', 'schb', '!' );
$add_option->( 'stack-closing-paren', 'scp', '!' );
$add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
$add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
$add_option->( 'stack-opening-paren', 'sop', '!' );
$add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
$add_option->( 'vertical-tightness', 'vt', '=i' );
$add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
$add_option->( 'want-break-after', 'wba', '=s' );
$add_option->( 'want-break-before', 'wbb', '=s' );
$add_option->( 'break-after-all-operators', 'baao', '!' );
$add_option->( 'break-before-all-operators', 'bbao', '!' );
$add_option->( 'keep-interior-semicolons', 'kis', '!' );
$category = 6; $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
$add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
$add_option->( 'maximum-fields-per-table', 'mft', '=i' );
$category = 7; $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
$add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
$add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
$add_option->( 'ignore-old-breakpoints', 'iob', '!' );
$category = 8; $add_option->( 'blanks-before-blocks', 'bbb', '!' );
$add_option->( 'blanks-before-comments', 'bbc', '!' );
$add_option->( 'blanks-before-subs', 'bbs', '!' );
$add_option->( 'long-block-line-count', 'lbl', '=i' );
$add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
$add_option->( 'swallow-optional-blank-lines', 'sob', '!' );
$category = 9; $add_option->( 'delete-block-comments', 'dbc', '!' );
$add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
$add_option->( 'delete-pod', 'dp', '!' );
$add_option->( 'delete-side-comments', 'dsc', '!' );
$add_option->( 'tee-block-comments', 'tbc', '!' );
$add_option->( 'tee-pod', 'tp', '!' );
$add_option->( 'tee-side-comments', 'tsc', '!' );
$add_option->( 'look-for-autoloader', 'lal', '!' );
$add_option->( 'look-for-hash-bang', 'x', '!' );
$add_option->( 'look-for-selfloader', 'lsl', '!' );
$add_option->( 'pass-version-line', 'pvl', '!' );
$category = 13; $add_option->( 'DEBUG', 'D', '!' );
$add_option->( 'DIAGNOSTICS', 'I', '!' );
$add_option->( 'check-multiline-quotes', 'chk', '!' );
$add_option->( 'dump-defaults', 'ddf', '!' );
$add_option->( 'dump-long-names', 'dln', '!' );
$add_option->( 'dump-options', 'dop', '!' );
$add_option->( 'dump-profile', 'dpro', '!' );
$add_option->( 'dump-short-names', 'dsn', '!' );
$add_option->( 'dump-token-types', 'dtt', '!' );
$add_option->( 'dump-want-left-space', 'dwls', '!' );
$add_option->( 'dump-want-right-space', 'dwrs', '!' );
$add_option->( 'fuzzy-line-length', 'fll', '!' );
$add_option->( 'help', 'h', '' );
$add_option->( 'short-concatenation-item-length', 'scl', '=i' );
$add_option->( 'show-options', 'opt', '!' );
$add_option->( 'version', 'v', '' );
Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
$category = 12; foreach my $opt (@option_string) {
my $long_name = $opt;
$long_name =~ s/(!|=.*|:.*)$//;
unless ( defined( $option_category{$long_name} ) ) {
if ( $long_name =~ /^html-linked/ ) {
$category = 10; }
elsif ( $long_name =~ /^pod2html/ ) {
$category = 11; }
$option_category{$long_name} = $category_name[$category];
}
}
%option_range = (
'format' => [ 'tidy', 'html', 'user' ],
'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
'block-brace-tightness' => [ 0, 2 ],
'brace-tightness' => [ 0, 2 ],
'paren-tightness' => [ 0, 2 ],
'square-bracket-tightness' => [ 0, 2 ],
'block-brace-vertical-tightness' => [ 0, 2 ],
'brace-vertical-tightness' => [ 0, 2 ],
'brace-vertical-tightness-closing' => [ 0, 2 ],
'paren-vertical-tightness' => [ 0, 2 ],
'paren-vertical-tightness-closing' => [ 0, 2 ],
'square-bracket-vertical-tightness' => [ 0, 2 ],
'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
'vertical-tightness' => [ 0, 2 ],
'vertical-tightness-closing' => [ 0, 2 ],
'closing-brace-indentation' => [ 0, 3 ],
'closing-paren-indentation' => [ 0, 3 ],
'closing-square-bracket-indentation' => [ 0, 3 ],
'closing-token-indentation' => [ 0, 3 ],
'closing-side-comment-else-flag' => [ 0, 2 ],
'comma-arrow-breakpoints' => [ 0, 3 ],
);
my @defaults = qw(
add-newlines
add-semicolons
add-whitespace
blanks-before-blocks
blanks-before-comments
blanks-before-subs
block-brace-tightness=0
block-brace-vertical-tightness=0
brace-tightness=1
brace-vertical-tightness-closing=0
brace-vertical-tightness=0
break-at-old-logical-breakpoints
break-at-old-ternary-breakpoints
break-at-old-keyword-breakpoints
comma-arrow-breakpoints=1
nocheck-syntax
closing-side-comment-interval=6
closing-side-comment-maximum-text=20
closing-side-comment-else-flag=0
closing-paren-indentation=0
closing-brace-indentation=0
closing-square-bracket-indentation=0
continuation-indentation=2
delete-old-newlines
delete-semicolons
fuzzy-line-length
hanging-side-comments
indent-block-comments
indent-columns=4
long-block-line-count=8
look-for-autoloader
look-for-selfloader
maximum-consecutive-blank-lines=1
maximum-fields-per-table=0
maximum-line-length=80
minimum-space-to-comment=4
nobrace-left-and-indent
nocuddled-else
nodelete-old-whitespace
nohtml
nologfile
noquiet
noshow-options
nostatic-side-comments
noswallow-optional-blank-lines
notabs
nowarning-output
outdent-labels
outdent-long-quotes
outdent-long-comments
paren-tightness=1
paren-vertical-tightness-closing=0
paren-vertical-tightness=0
pass-version-line
recombine
valign
short-concatenation-item-length=8
space-for-semicolon
square-bracket-tightness=1
square-bracket-vertical-tightness-closing=0
square-bracket-vertical-tightness=0
static-block-comments
trim-qw
format=tidy
backup-file-extension=bak
format-skipping
pod2html
html-table-of-contents
html-entities
);
push @defaults, "perl-syntax-check-flags=-c -T";
%expansion = (
%expansion,
'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
'fnl' => [qw(freeze-newlines)],
'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
'fws' => [qw(freeze-whitespace)],
'indent-only' => [qw(freeze-newlines freeze-whitespace)],
'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
'nooutdent-long-lines' =>
[qw(nooutdent-long-quotes nooutdent-long-comments)],
'noll' => [qw(nooutdent-long-lines)],
'io' => [qw(indent-only)],
'delete-all-comments' =>
[qw(delete-block-comments delete-side-comments delete-pod)],
'nodelete-all-comments' =>
[qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
'dac' => [qw(delete-all-comments)],
'ndac' => [qw(nodelete-all-comments)],
'gnu' => [qw(gnu-style)],
'pbp' => [qw(perl-best-practices)],
'tee-all-comments' =>
[qw(tee-block-comments tee-side-comments tee-pod)],
'notee-all-comments' =>
[qw(notee-block-comments notee-side-comments notee-pod)],
'tac' => [qw(tee-all-comments)],
'ntac' => [qw(notee-all-comments)],
'html' => [qw(format=html)],
'nhtml' => [qw(format=tidy)],
'tidy' => [qw(format=tidy)],
'break-after-comma-arrows' => [qw(cab=0)],
'nobreak-after-comma-arrows' => [qw(cab=1)],
'baa' => [qw(cab=0)],
'nbaa' => [qw(cab=1)],
'break-at-old-trinary-breakpoints' => [qw(bot)],
'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
'icp' => [qw(cpi=2 cbi=2 csbi=2)],
'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
'otr' => [qw(opr ohbr osbr)],
'opening-token-right' => [qw(opr ohbr osbr)],
'notr' => [qw(nopr nohbr nosbr)],
'noopening-token-right' => [qw(nopr nohbr nosbr)],
'sot' => [qw(sop sohb sosb)],
'nsot' => [qw(nsop nsohb nsosb)],
'stack-opening-tokens' => [qw(sop sohb sosb)],
'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
'sct' => [qw(scp schb scsb)],
'stack-closing-tokens' => => [qw(scp schb scsb)],
'nsct' => [qw(nscp nschb nscsb)],
'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
'mangle' => [
qw(
check-syntax
delete-old-newlines
delete-old-whitespace
delete-semicolons
indent-columns=0
maximum-consecutive-blank-lines=0
maximum-line-length=100000
noadd-newlines
noadd-semicolons
noadd-whitespace
noblanks-before-blocks
noblanks-before-subs
notabs
)
],
'extrude' => [
qw(
check-syntax
ci=0
delete-old-newlines
delete-old-whitespace
delete-semicolons
indent-columns=0
maximum-consecutive-blank-lines=0
maximum-line-length=1
noadd-semicolons
noadd-whitespace
noblanks-before-blocks
noblanks-before-subs
nofuzzy-line-length
notabs
norecombine
)
],
'gnu-style' => [
qw(
lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
)
],
'perl-best-practices' => [
qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
],
);
Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
return (
\@option_string, \@defaults, \%expansion,
\%option_category, \%option_range
);
}
sub process_command_line {
my (
$perltidyrc_stream, $is_Windows, $Windows_type,
$rpending_complaint, $dump_options_type
) = @_;
use Getopt::Long;
my (
$roption_string, $rdefaults, $rexpansion,
$roption_category, $roption_range
) = generate_options();
my %Opts = ();
{
local @ARGV;
my $i;
unless ( $dump_options_type eq 'perltidyrc' ) {
for $i (@$rdefaults) { push @ARGV, "--" . $i }
}
my $glc;
eval { $glc = Getopt::Long::Configure() };
unless ($@) {
eval { Getopt::Long::ConfigDefaults() };
}
else { $glc = undef }
if ( !GetOptions( \%Opts, @$roption_string ) ) {
die "Programming Bug: error in setting default options";
}
eval { Getopt::Long::Configure($glc) } if defined $glc;
}
my $word;
my @raw_options = ();
my $config_file = "";
my $saw_ignore_profile = 0;
my $saw_extrude = 0;
my $saw_dump_profile = 0;
my $i;
foreach $i (@ARGV) {
$i =~ s/^--/-/;
if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
$saw_ignore_profile = 1;
}
elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
$saw_dump_profile = 1;
}
elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
if ($config_file) {
warn
"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
}
$config_file = $2;
unless ( -e $config_file ) {
warn "cannot find file given with -pro=$config_file: $!\n";
$config_file = "";
}
}
elsif ( $i =~ /^-(pro|profile)=?$/ ) {
die "usage: -pro=filename or --profile=filename, no spaces\n";
}
elsif ( $i =~ /^-extrude$/ ) {
$saw_extrude = 1;
}
elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
usage();
exit 1;
}
elsif ( $i =~ /^-(version|v)$/ ) {
show_version();
exit 1;
}
elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
dump_defaults(@$rdefaults);
exit 1;
}
elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
dump_long_names(@$roption_string);
exit 1;
}
elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
dump_short_names($rexpansion);
exit 1;
}
elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
exit 1;
}
}
if ( $saw_dump_profile && $saw_ignore_profile ) {
warn "No profile to dump because of -npro\n";
exit 1;
}
unless ($saw_ignore_profile) {
if ($perltidyrc_stream) {
if ($config_file) {
warn <<EOM;
Conflict: a perltidyrc configuration file was specified both as this
perltidy call parameter: $perltidyrc_stream
and with this -profile=$config_file.
Using -profile=$config_file.
EOM
}
else {
$config_file = $perltidyrc_stream;
}
}
my $rconfig_file_chatter;
$$rconfig_file_chatter = "";
$config_file =
find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
$rpending_complaint )
unless $config_file;
my $fh_config;
if ($config_file) {
( $fh_config, $config_file ) =
Perl::Tidy::streamhandle( $config_file, 'r' );
unless ($fh_config) {
$$rconfig_file_chatter .=
"# $config_file exists but cannot be opened\n";
}
}
if ($saw_dump_profile) {
if ($saw_dump_profile) {
dump_config_file( $fh_config, $config_file,
$rconfig_file_chatter );
exit 1;
}
}
if ($fh_config) {
my ( $rconfig_list, $death_message ) =
read_config_file( $fh_config, $config_file, $rexpansion );
die $death_message if ($death_message);
if (@$rconfig_list) {
local @ARGV = @$rconfig_list;
expand_command_abbreviations( $rexpansion, \@raw_options,
$config_file );
if ( !GetOptions( \%Opts, @$roption_string ) ) {
die
"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
}
if (@ARGV) {
my $count = @ARGV;
my $str = "\'" . pop(@ARGV) . "\'";
while ( my $param = pop(@ARGV) ) {
if ( length($str) < 70 ) {
$str .= ", '$param'";
}
else {
$str .= ", ...";
last;
}
}
die <<EOM;
There are $count unrecognized values in the configuration file '$config_file':
$str
Use leading dashes for parameters. Use -npro to ignore this file.
EOM
}
foreach (
qw{
dump-defaults
dump-long-names
dump-options
dump-profile
dump-short-names
dump-token-types
dump-want-left-space
dump-want-right-space
help
stylesheet
version
}
)
{
if ( defined( $Opts{$_} ) ) {
delete $Opts{$_};
warn "ignoring --$_ in config file: $config_file\n";
}
}
}
}
}
expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
if ( !GetOptions( \%Opts, @$roption_string ) ) {
die "Error on command line; for help try 'perltidy -h'\n";
}
return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
$rexpansion, $roption_category, $roption_range );
}
sub check_options {
my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
if ( defined $rOpts->{'vertical-tightness'} ) {
my $vt = $rOpts->{'vertical-tightness'};
$rOpts->{'paren-vertical-tightness'} = $vt;
$rOpts->{'square-bracket-vertical-tightness'} = $vt;
$rOpts->{'brace-vertical-tightness'} = $vt;
}
if ( defined $rOpts->{'vertical-tightness-closing'} ) {
my $vtc = $rOpts->{'vertical-tightness-closing'};
$rOpts->{'paren-vertical-tightness-closing'} = $vtc;
$rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
$rOpts->{'brace-vertical-tightness-closing'} = $vtc;
}
if ( defined $rOpts->{'closing-token-indentation'} ) {
my $cti = $rOpts->{'closing-token-indentation'};
$rOpts->{'closing-square-bracket-indentation'} = $cti;
$rOpts->{'closing-brace-indentation'} = $cti;
$rOpts->{'closing-paren-indentation'} = $cti;
}
if ( $rOpts->{'quiet'} ) {
$rOpts->{'check-syntax'} = 0;
}
if ( $rOpts->{'format'} ne 'tidy' ) {
$rOpts->{'check-syntax'} = 0;
}
if ( $rOpts->{'check-syntax'}
&& $is_Windows
&& ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
{
$rOpts->{'check-syntax'} = 0;
}
unless ($is_Windows) {
if ( $< == 0 && $rOpts->{'check-syntax'} ) {
$rOpts->{'check-syntax'} = 0;
$$rpending_complaint .=
"Syntax check deactivated for safety; you shouldn't run this as root\n";
}
}
if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
if ( $rOpts->{'logfile-gap'} == 0 ) {
$rOpts->{'logfile-gap'} = 1;
}
$rOpts->{'logfile'} = 1;
}
else {
$rOpts->{'logfile-gap'} = 50;
}
if ( !$rOpts->{'add-whitespace'}
&& !$rOpts->{'delete-old-whitespace'}
&& !$rOpts->{'add-newlines'}
&& !$rOpts->{'delete-old-newlines'} )
{
$rOpts->{'indent-only'} = 1;
}
if ( $rOpts->{'indent-spaced-block-comments'} ) {
$rOpts->{'indent-block-comments'} = 1;
}
if ( $rOpts->{'brace-left-and-indent'} ) {
$rOpts->{'opening-brace-on-new-line'} = 1;
}
if ( $rOpts->{'opening-brace-always-on-right'}
&& $rOpts->{'opening-brace-on-new-line'} )
{
warn <<EOM;
Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
'opening-brace-on-new-line' (-bl). Ignoring -bl.
EOM
$rOpts->{'opening-brace-on-new-line'} = 0;
}
if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
$rOpts->{'opening-brace-on-new-line'} = 0;
}
if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
$rOpts->{'opening-sub-brace-on-new-line'} =
$rOpts->{'opening-brace-on-new-line'};
}
unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {
$rOpts->{'swallow-optional-blank-lines'} = 1;
}
if ( $rOpts->{'entab-leading-whitespace'} ) {
if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
warn "-et=n must use a positive integer; ignoring -et\n";
$rOpts->{'entab-leading-whitespace'} = undef;
}
if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
}
}
sub expand_command_abbreviations {
my ( $rexpansion, $rraw_options, $config_file ) = @_;
my ($word);
my $max_passes = 10;
my @new_argv = ();
for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
my @new_argv = ();
my $abbrev_count = 0;
foreach $word (@ARGV) {
if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
my $abr = $1;
my $flags = $2;
if ( $pass_count == 0 ) {
push( @$rraw_options, $word );
}
if ( $rexpansion->{ $abr . $flags } ) {
$abr = $abr . $flags;
$flags = "";
}
if ( $rexpansion->{$abr} ) {
$abbrev_count++;
foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
next unless $abbrev; push( @new_argv, '--' . $abbrev . $flags );
}
}
else {
push( @new_argv, $word );
}
}
else {
push( @new_argv, $word );
}
}
@ARGV = @new_argv;
last unless ( $abbrev_count > 0 );
if ( $pass_count == $max_passes ) {
print STDERR
"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
print STDERR "Here are the raw options\n";
local $" = ')(';
print STDERR "(@$rraw_options)\n";
my $num = @new_argv;
if ( $num < 50 ) {
print STDERR "After $max_passes passes here is ARGV\n";
print STDERR "(@new_argv)\n";
}
else {
print STDERR "After $max_passes passes ARGV has $num entries\n";
}
if ($config_file) {
die <<"DIE";
Please check your configuration file $config_file for circular-references.
To deactivate it, use -npro.
DIE
}
else {
die <<'DIE';
Program bug - circular-references in the %expansion hash, probably due to
a recent program change.
DIE
}
} # end of check for circular references
} # end of loop over all passes
}
# Debug routine -- this will dump the expansion hash
sub dump_short_names {
my $rexpansion = shift;
print STDOUT <<EOM;
List of short names. This list shows how all abbreviations are
translated into other abbreviations and, eventually, into long names.
New abbreviations may be defined in a .perltidyrc file.
For a list of all long names, use perltidy --dump-long-names (-dln).
--------------------------------------------------------------------------
EOM
foreach my $abbrev ( sort keys %$rexpansion ) {
my @list = @{ $$rexpansion{$abbrev} };
print STDOUT "$abbrev --> @list\n";
}
}
sub check_vms_filename {
# given a valid filename (the perltidy input file)
# create a modified filename and separator character
# suitable for VMS.
#
# Contributed by Michael Cartmell
#
my ( $base, $path ) = fileparse( $_[0] );
# remove explicit ; version
$base =~ s/;-?\d*$//
# remove explicit . version ie two dots in filename NB ^ escapes a dot
or $base =~ s/( # begin capture $1
(?:^|[^^])\. # match a dot not preceded by a caret
(?: # followed by nothing
| # or
.*[^^] # anything ending in a non caret
)
) # end capture $1
\.-?\d*$ # match . version number
/$1/x;
# normalise filename, if there are no unescaped dots then append one
$base .= '.' unless $base =~ /(?:^|[^^])\./;
# if we don't already have an extension then we just append the extention
my $separator = ( $base =~ /\.$/ ) ? "" : "_";
return ( $path . $base, $separator );
}
sub Win_OS_Type {
# TODO: are these more standard names?
# Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
# Returns a string that determines what MS OS we are on.
# Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
# Returns blank string if not an MS system.
# Original code contributed by: Yves Orton
# We need to know this to decide where to look for config files
my $rpending_complaint = shift;
my $os = "";
return $os unless $^O =~ /win32|dos/i; # is it a MS box?
# Systems built from Perl source may not have Win32.pm
# But probably have Win32::GetOSVersion() anyway so the
# following line is not 'required':
# return $os unless eval('require Win32');
# Use the standard API call to determine the version
my ( $undef, $major, $minor, $build, $id );
eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
#
# NAME ID MAJOR MINOR
# Windows NT 4 2 4 0
# Windows 2000 2 5 0
# Windows XP 2 5 1
# Windows Server 2003 2 5 2
return "win32s" unless $id; # If id==0 then its a win32s box.
$os = { # Magic numbers from MSDN
# documentation of GetOSVersion
1 => {
0 => "95",
10 => "98",
90 => "Me"
},
2 => {
0 => "2000", # or NT 4, see below
1 => "XP/.Net",
2 => "Win2003",
51 => "NT3.51"
}
}->{$id}->{$minor};
# If $os is undefined, the above code is out of date. Suggested updates
# are welcome.
unless ( defined $os ) {
$os = "";
$$rpending_complaint .= <<EOS;
Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
We won't be able to look for a system-wide config file.
EOS
}
# Unfortunately the logic used for the various versions isnt so clever..
# so we have to handle an outside case.
return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
}
sub is_unix {
return
( $^O !~ /win32|dos/i )
&& ( $^O ne 'VMS' )
&& ( $^O ne 'OS2' )
&& ( $^O ne 'MacOS' );
}
sub look_for_Windows {
# determine Windows sub-type and location of
# system-wide configuration files
my $rpending_complaint = shift;
my $is_Windows = ( $^O =~ /win32|dos/i );
my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
return ( $is_Windows, $Windows_type );
}
sub find_config_file {
# look for a .perltidyrc configuration file
my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
$rpending_complaint ) = @_;
$$rconfig_file_chatter .= " if ($is_Windows) {
$$rconfig_file_chatter .= "Windows $Windows_type\n";
}
else {
$$rconfig_file_chatter .= " $^O\n";
}
my $exists_config_file = sub {
my $config_file = shift;
return 0 unless $config_file;
$$rconfig_file_chatter .= "# Testing: $config_file\n";
return -f $config_file;
};
my $config_file;
$config_file = ".perltidyrc";
return $config_file if $exists_config_file->($config_file);
my @envs = qw(PERLTIDY HOME);
push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
foreach my $var (@envs) {
$$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
if ( defined( $ENV{$var} ) ) {
$$rconfig_file_chatter .= " = $ENV{$var}\n";
if ( $var eq 'PERLTIDY' ) {
$config_file = "$ENV{$var}";
return $config_file if $exists_config_file->($config_file);
}
$config_file = catfile( $ENV{$var}, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
}
else {
$$rconfig_file_chatter .= "\n";
}
}
if ($is_Windows) {
if ($Windows_type) {
my ( $os, $system, $allusers ) =
Win_Config_Locs( $rpending_complaint, $Windows_type );
if ($allusers) {
$config_file = catfile( $allusers, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
}
$config_file = catfile( $system, ".perltidyrc" );
return $config_file if $exists_config_file->($config_file);
}
}
elsif ( $^O eq 'OS2' ) {
}
elsif ( $^O eq 'MacOS' ) {
}
elsif ( $^O eq 'VMS' ) {
}
else {
$config_file = "/usr/local/etc/perltidyrc";
return $config_file if $exists_config_file->($config_file);
$config_file = "/etc/perltidyrc";
return $config_file if $exists_config_file->($config_file);
}
return;
}
sub Win_Config_Locs {
my $rpending_complaint = shift;
my $os = (@_) ? shift : Win_OS_Type();
return unless $os;
my $system = "";
my $allusers = "";
if ( $os =~ /9[58]|Me/ ) {
$system = "C:/Windows";
}
elsif ( $os =~ /NT|XP|200?/ ) {
$system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
$allusers =
( $os =~ /NT/ )
? "C:/WinNT/profiles/All Users/"
: "C:/Documents and Settings/All Users/";
}
else {
$$rpending_complaint .=
"I dont know a sensible place to look for config files on an $os system.\n";
return;
}
return wantarray ? ( $os, $system, $allusers ) : $os;
}
sub dump_config_file {
my $fh = shift;
my $config_file = shift;
my $rconfig_file_chatter = shift;
print STDOUT "$$rconfig_file_chatter";
if ($fh) {
print STDOUT "# Dump of file: '$config_file'\n";
while ( my $line = $fh->getline() ) { print STDOUT $line }
eval { $fh->close() };
}
else {
print STDOUT "# ...no config file found\n";
}
}
sub read_config_file {
my ( $fh, $config_file, $rexpansion ) = @_;
my @config_list = ();
my $death_message = "";
my $name = undef;
my $line_no;
while ( my $line = $fh->getline() ) {
$line_no++;
chomp $line;
next if $line =~ /^\s* ( $line, $death_message ) =
strip_comment( $line, $config_file, $line_no );
last if ($death_message);
$line =~ s/^\s*(.*?)\s*$/$1/; next unless $line;
if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
my ( $newname, $body, $curly ) = ( $2, $3, $4 );
if ($newname) {
if ($name) {
$death_message =
"No '}' seen after $name and before $newname in config file $config_file line $.\n";
last;
}
$name = $newname;
if ( ${$rexpansion}{$name} ) {
local $" = ')(';
my @names = sort keys %$rexpansion;
$death_message =
"Here is a list of all installed aliases\n(@names)\n"
. "Attempting to redefine alias ($name) in config file $config_file line $.\n";
last;
}
${$rexpansion}{$name} = [];
}
# now do the body
if ($body) {
my ( $rbody_parts, $msg ) = parse_args($body);
if ($msg) {
$death_message = <<EOM;
Error reading file '$config_file' at line number $line_no.
$msg
Please fix this line or use -npro to avoid reading this file
EOM
last;
}
if ($name) {
# remove leading dashes if this is an alias
foreach (@$rbody_parts) { s/^\-+//; }
push @{ ${$rexpansion}{$name} }, @$rbody_parts;
}
else {
push( @config_list, @$rbody_parts );
}
}
if ($curly) {
unless ($name) {
$death_message =
"Unexpected '}' seen in config file $config_file line $.\n";
last;
}
$name = undef;
}
}
}
eval { $fh->close() };
return ( \@config_list, $death_message );
}
sub strip_comment {
my ( $instr, $config_file, $line_no ) = @_;
my $msg = "";
# nothing to do if no comments
if ( $instr !~ /#/ ) {
return ( $instr, $msg );
}
# use simple method of no quotes
elsif ( $instr !~ /['"]/ ) {
$instr =~ s/\s*\ return ( $instr, $msg );
}
my $outstr = "";
my $quote_char = "";
while (1) {
if ($quote_char) {
if ( $instr =~ /\G($quote_char)/gc ) {
$quote_char = "";
$outstr .= $1;
}
elsif ( $instr =~ /\G(.)/gc ) {
$outstr .= $1;
}
else {
$msg = <<EOM;
Error reading file $config_file at line number $line_no.
Did not see ending quote character <$quote_char> in this text:
$instr
Please fix this line or use -npro to avoid reading this file
EOM
last;
}
}
else {
if ( $instr =~ /\G([\"\'])/gc ) {
$outstr .= $1;
$quote_char = $1;
}
elsif ( $instr =~ /\G last;
}
elsif ( $instr =~ /\G(.)/gc ) {
$outstr .= $1;
}
else {
last;
}
}
}
return ( $outstr, $msg );
}
sub parse_args {
my ($body) = @_;
my @body_parts = ();
my $quote_char = "";
my $part = "";
my $msg = "";
while (1) {
if ($quote_char) {
if ( $body =~ /\G($quote_char)/gc ) {
$quote_char = "";
}
elsif ( $body =~ /\G(.)/gc ) {
$part .= $1;
}
else {
if ( length($part) ) { push @body_parts, $part; }
$msg = <<EOM;
Did not see ending quote character <$quote_char> in this text:
$body
EOM
last;
}
}
else {
if ( $body =~ /\G([\"\'])/gc ) {
$quote_char = $1;
}
elsif ( $body =~ /\G(\s+)/gc ) {
if ( length($part) ) { push @body_parts, $part; }
$part = "";
}
elsif ( $body =~ /\G(.)/gc ) {
$part .= $1;
}
else {
if ( length($part) ) { push @body_parts, $part; }
last;
}
}
}
return ( \@body_parts, $msg );
}
sub dump_long_names {
my @names = sort @_;
print STDOUT <<EOM;
EOM
foreach (@names) { print STDOUT "$_\n" }
}
sub dump_defaults {
my @defaults = sort @_;
print STDOUT "Default command line options:\n";
foreach (@_) { print STDOUT "$_\n" }
}
sub readable_options {
my ( $rOpts, $roption_string ) = @_;
my %Getopt_flags;
my $rGetopt_flags = \%Getopt_flags;
my $readable_options = "# Final parameter set for this run.\n";
$readable_options .=
"# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
foreach my $opt ( @{$roption_string} ) {
my $flag = "";
if ( $opt =~ /(.*)(!|=.*)$/ ) {
$opt = $1;
$flag = $2;
}
if ( defined( $rOpts->{$opt} ) ) {
$rGetopt_flags->{$opt} = $flag;
}
}
foreach my $key ( sort keys %{$rOpts} ) {
my $flag = $rGetopt_flags->{$key};
my $value = $rOpts->{$key};
my $prefix = '--';
my $suffix = "";
if ($flag) {
if ( $flag =~ /^=/ ) {
if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
$suffix = "=" . $value;
}
elsif ( $flag =~ /^!/ ) {
$prefix .= "no" unless ($value);
}
else {
$readable_options .=
"# ERROR in dump_options: unrecognized flag $flag for $key\n";
}
}
$readable_options .= $prefix . $key . $suffix . "\n";
}
return $readable_options;
}
sub show_version {
print <<"EOM";
This is perltidy, v$VERSION
Copyright 2000-2007, Steve Hancock
Perltidy is free software and may be copied under the terms of the GNU
General Public License, which is included in the distribution files.
Complete documentation for perltidy can be found using 'man perltidy'
or on the internet at http://perltidy.sourceforge.net.
EOM
}
sub usage {
print STDOUT <<EOF;
This is perltidy version $VERSION, a perl script indenter. Usage:
perltidy [ options ] file1 file2 file3 ...
(output goes to file1.tdy, file2.tdy, file3.tdy, ...)
perltidy [ options ] file1 -o outfile
perltidy [ options ] file1 -st >outfile
perltidy [ options ] <infile >outfile
Options have short and long forms. Short forms are shown; see
man pages for long forms. Note: '=s' indicates a required string,
and '=n' indicates a required integer.
I/O control
-h show this help
-o=file name of the output file (only if single input file)
-oext=s change output extension from 'tdy' to s
-opath=path change path to be 'path' for output files
-b backup original to .bak and modify file in-place
-bext=s change default backup extension from 'bak' to s
-q deactivate error messages (for running under editor)
-w include non-critical warning messages in the .ERR error output
-syn run perl -c to check syntax (default under unix systems)
-log save .LOG file, which has useful diagnostics
-f force perltidy to read a binary file
-g like -log but writes more detailed .LOG file, for debugging scripts
-opt write the set of options actually used to a .LOG file
-npro ignore .perltidyrc configuration command file
-pro=file read configuration commands from file instead of .perltidyrc
-st send output to standard output, STDOUT
-se send error output to standard error output, STDERR
-v display version number to standard output and quit
Basic Options:
-i=n use n columns per indentation level (default n=4)
-t tabs: use one tab character per indentation level, not recommeded
-nt no tabs: use n spaces per indentation level (default)
-et=n entab leading whitespace n spaces per tab; not recommended
-io "indent only": just do indentation, no other formatting.
-sil=n set starting indentation level to n; use if auto detection fails
-ole=s specify output line ending (s=dos or win, mac, unix)
-ple keep output line endings same as input (input must be filename)
Whitespace Control
-fws freeze whitespace; this disables all whitespace changes
and disables the following switches:
-bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
-bbt same as -bt but for code block braces; same as -bt if not given
-bbvt block braces vertically tight; use with -bl or -bli
-bbvtl=s make -bbvt to apply to selected list of block types
-pt=n paren tightness (n=0, 1 or 2)
-sbt=n square bracket tightness (n=0, 1, or 2)
-bvt=n brace vertical tightness,
n=(0=open, 1=close unless multiple steps on a line, 2=always close)
-pvt=n paren vertical tightness (see -bvt for n)
-sbvt=n square bracket vertical tightness (see -bvt for n)
-bvtc=n closing brace vertical tightness:
n=(0=open, 1=sometimes close, 2=always close)
-pvtc=n closing paren vertical tightness, see -bvtc for n.
-sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
-ci=n sets continuation indentation=n, default is n=2 spaces
-lp line up parentheses, brackets, and non-BLOCK braces
-sfs add space before semicolon in for( ; ; )
-aws allow perltidy to add whitespace (default)
-dws delete all old non-essential whitespace
-icb indent closing brace of a code block
-cti=n closing indentation of paren, square bracket, or non-block brace:
n=0 none, =1 align with opening, =2 one full indentation level
-icp equivalent to -cti=2
-wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
-wrs=s want space right of tokens in string;
-sts put space before terminal semicolon of a statement
-sak=s put space between keywords given in s and '(';
-nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
Line Break Control
-fnl freeze newlines; this disables all line break changes
and disables the following switches:
-anl add newlines; ok to introduce new line breaks
-bbs add blank line before subs and packages
-bbc add blank line before block comments
-bbb add blank line between major blocks
-sob swallow optional blank lines
-ce cuddled else; use this style: '} else {'
-dnl delete old newlines (default)
-mbl=n maximum consecutive blank lines (default=1)
-l=n maximum line length; default n=80
-bl opening brace on new line
-sbl opening sub brace on new line. value of -bl is used if not given.
-bli opening brace on new line and indented
-bar opening brace always on right, even for long clauses
-vt=n vertical tightness (requires -lp); n controls break after opening
token: 0=never 1=no break if next line balanced 2=no break
-vtc=n vertical tightness of closing container; n controls if closing
token starts new line: 0=always 1=not unless list 1=never
-wba=s want break after tokens in string; i.e. wba=': .'
-wbb=s want break before tokens in string
Following Old Breakpoints
-kis keep interior semicolons. Allows multiple statements per line.
-boc break at old comma breaks: turns off all automatic list formatting
-bol break at old logical breakpoints: or, and, ||, && (default)
-bok break at old list keyword breakpoints such as map, sort (default)
-bot break at old conditional (ternary ?:) operator breakpoints (default)
-cab=n break at commas after a comma-arrow (=>):
n=0 break at all commas after =>
n=1 stable: break unless this breaks an existing one-line container
n=2 break only if a one-line container cannot be formed
n=3 do not treat commas after => specially at all
Comment controls
-ibc indent block comments (default)
-isbc indent spaced block comments; may indent unless no leading space
-msc=n minimum desired spaces to side comment, default 4
-fpsc=n fix position for side comments; default 0;
-csc add or update closing side comments after closing BLOCK brace
-dcsc delete closing side comments created by a -csc command
-cscp=s change closing side comment prefix to be other than '## end'
-cscl=s change closing side comment to apply to selected list of blocks
-csci=n minimum number of lines needed to apply a -csc tag, default n=6
-csct=n maximum number of columns of appended text, default n=20
-cscw causes warning if old side comment is overwritten with -csc
-sbc use 'static block comments' identified by leading '##' (default)
-sbcp=s change static block comment identifier to be other than '##'
-osbc outdent static block comments
-ssc use 'static side comments' identified by leading '##' (default)
-sscp=s change static side comment identifier to be other than '##'
Delete selected text
-dac delete all comments AND pod
-dbc delete block comments
-dsc delete side comments
-dp delete pod
Send selected text to a '.TEE' file
-tac tee all comments AND pod
-tbc tee block comments
-tsc tee side comments
-tp tee pod
Outdenting
-olq outdent long quoted strings (default)
-olc outdent a long block comment line
-ola outdent statement labels
-okw outdent control keywords (redo, next, last, goto, return)
-okwl=s specify alternative keywords for -okw command
Other controls
-mft=n maximum fields per table; default n=40
-x do not format lines before hash-bang line (i.e., for VMS)
-asc allows perltidy to add a ';' when missing (default)
-dsm allows perltidy to delete an unnecessary ';' (default)
Combinations of other parameters
-gnu attempt to follow GNU Coding Standards as applied to perl
-mangle remove as many newlines as possible (but keep comments and pods)
-extrude insert as many newlines as possible
Dump and die, debugging
-dop dump options used in this run to standard output and quit
-ddf dump default options to standard output and quit
-dsn dump all option short names to standard output and quit
-dln dump option long names to standard output and quit
-dpro dump whatever configuration file is in effect to standard output
-dtt dump all token types to standard output and quit
HTML
-html write an html file (see 'man perl2web' for many options)
Note: when -html is used, no indentation or formatting are done.
Hint: try perltidy -html -css=mystyle.css filename.pl
and edit mystyle.css to change the appearance of filename.html.
-nnn gives line numbers
-pre only writes out <pre>..</pre> code section
-toc places a table of contents to subs at the top (default)
-pod passes pod text through pod2html (default)
-frm write html as a frame (3 files)
-text=s extra extension for table of contents if -frm, default='toc'
-sext=s extra extension for file content if -frm, default='src'
A prefix of "n" negates short form toggle switches, and a prefix of "no"
negates the long forms. For example, -nasc means don't add missing
semicolons.
If you are unable to see this entire text, try "perltidy -h | more"
For more detailed information, and additional options, try "man perltidy",
or go to the perltidy home page at http://perltidy.sourceforge.net
EOF
}
sub process_this_file {
my ( $truth, $beauty ) = @_;
# loop to process each line of this file
while ( my $line_of_tokens = $truth->get_line() ) {
$beauty->write_line($line_of_tokens);
}
# finish up
eval { $beauty->finish_formatting() };
$truth->report_tokenization_errors();
}
sub check_syntax {
# Use 'perl -c' to make sure that we did not create bad syntax
# This is a very good independent check for programming errors
#
# Given names of the input and output files, ($ifname, $ofname),
# we do the following:
# - check syntax of the input file
# - if bad, all done (could be an incomplete code snippet)
# - if infile syntax ok, then check syntax of the output file;
# - if outfile syntax bad, issue warning; this implies a code bug!
# - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
my $infile_syntax_ok = 0;
my $line_of_dashes = '-' x 42 . "\n";
my $flags = $rOpts->{'perl-syntax-check-flags'};
# be sure we invoke perl with -c
# note: perl will accept repeated flags like '-c -c'. It is safest
# to append another -c than try to find an interior bundled c, as
# in -Tc, because such a 'c' might be in a quoted string, for example.
if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
# be sure we invoke perl with -x if requested
# same comments about repeated parameters applies
if ( $rOpts->{'look-for-hash-bang'} ) {
if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
}
# this shouldn't happen unless a termporary file couldn't be made
if ( $ifname eq '-' ) {
$logger_object->write_logfile_entry(
"Cannot run perl -c on STDIN and STDOUT\n");
return $infile_syntax_ok;
}
$logger_object->write_logfile_entry(
"checking input file syntax with perl $flags\n");
$logger_object->write_logfile_entry($line_of_dashes);
# Not all operating systems/shells support redirection of the standard
# error output.
my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
$logger_object->write_logfile_entry("$perl_output\n");
if ( $perl_output =~ /syntax\s*OK/ ) {
$infile_syntax_ok = 1;
$logger_object->write_logfile_entry($line_of_dashes);
$logger_object->write_logfile_entry(
"checking output file syntax with perl $flags ...\n");
$logger_object->write_logfile_entry($line_of_dashes);
my $perl_output =
do_syntax_check( $ofname, $flags, $error_redirection );
$logger_object->write_logfile_entry("$perl_output\n");
unless ( $perl_output =~ /syntax\s*OK/ ) {
$logger_object->write_logfile_entry($line_of_dashes);
$logger_object->warning(
"The output file has a syntax error when tested with perl $flags $ofname !\n"
);
$logger_object->warning(
"This implies an error in perltidy; the file $ofname is bad\n");
$logger_object->report_definite_bug();
# the perl version number will be helpful for diagnosing the problem
$logger_object->write_logfile_entry(
qx/perl -v $error_redirection/ . "\n" );
}
}
else {
# Only warn of perl -c syntax errors. Other messages,
# such as missing modules, are too common. They can be
# seen by running with perltidy -w
$logger_object->complain("A syntax check using perl $flags gives: \n");
$logger_object->complain($line_of_dashes);
$logger_object->complain("$perl_output\n");
$logger_object->complain($line_of_dashes);
$infile_syntax_ok = -1;
$logger_object->write_logfile_entry($line_of_dashes);
$logger_object->write_logfile_entry(
"The output file will not be checked because of input file problems\n"
);
}
return $infile_syntax_ok;
}
sub do_syntax_check {
my ( $fname, $flags, $error_redirection ) = @_;
# We have to quote the filename in case it has unusual characters
# or spaces. Example: this filename #CM11.pm# gives trouble.
$fname = '"' . $fname . '"';
# Under VMS something like -T will become -t (and an error) so we
# will put quotes around the flags. Double quotes seem to work on
# Unix/Windows/VMS, but this may not work on all systems. (Single
# quotes do not work under Windows). It could become necessary to
# put double quotes around each flag, such as: -"c" -"T"
# We may eventually need some system-dependent coding here.
$flags = '"' . $flags . '"';
# now wish for luck...
return qx/perl $flags $fname $error_redirection/;
}
#####################################################################
#
# This is a stripped down version of IO::Scalar
# Given a reference to a scalar, it supplies either:
# a getline method which reads lines (mode='r'), or
# a print method which reads lines (mode='w')
#
#####################################################################
package Perl::Tidy::IOScalar;
use Carp;
sub new {
my ( $package, $rscalar, $mode ) = @_;
my $ref = ref $rscalar;
if ( $ref ne 'SCALAR' ) {
confess <<EOM;
------------------------------------------------------------------------
expecting ref to SCALAR but got ref to ($ref); trace follows:
------------------------------------------------------------------------
EOM
}
if ( $mode eq 'w' ) {
$$rscalar = "";
return bless [ $rscalar, $mode ], $package;
}
elsif ( $mode eq 'r' ) {
# Convert a scalar to an array.
# This avoids looking for "\n" on each call to getline
my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
my $i_next = 0;
return bless [ \@array, $mode, $i_next ], $package;
}
else {
confess <<EOM;
------------------------------------------------------------------------
expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
------------------------------------------------------------------------
EOM
}
}
sub getline {
my $self = shift;
my $mode = $self->[1];
if ( $mode ne 'r' ) {
confess <<EOM;
------------------------------------------------------------------------
getline call requires mode = 'r' but mode = ($mode); trace follows:
------------------------------------------------------------------------
EOM
}
my $i = $self->[2]++;
##my $line = $self->[0]->[$i];
return $self->[0]->[$i];
}
sub print {
my $self = shift;
my $mode = $self->[1];
if ( $mode ne 'w' ) {
confess <<EOM;
------------------------------------------------------------------------
print call requires mode = 'w' but mode = ($mode); trace follows:
------------------------------------------------------------------------
EOM
}
${ $self->[0] } .= $_[0];
}
sub close { return }
#####################################################################
#
# This is a stripped down version of IO::ScalarArray
# Given a reference to an array, it supplies either:
# a getline method which reads lines (mode='r'), or
# a print method which reads lines (mode='w')
#
# NOTE: this routine assumes that that there aren't any embedded
package Perl::Tidy::IOScalarArray;
use Carp;
sub new {
my ( $package, $rarray, $mode ) = @_;
my $ref = ref $rarray;
if ( $ref ne 'ARRAY' ) {
confess <<EOM;
------------------------------------------------------------------------
expecting ref to ARRAY but got ref to ($ref); trace follows:
------------------------------------------------------------------------
EOM
}
if ( $mode eq 'w' ) {
@$rarray = ();
return bless [ $rarray, $mode ], $package;
}
elsif ( $mode eq 'r' ) {
my $i_next = 0;
return bless [ $rarray, $mode, $i_next ], $package;
}
else {
confess <<EOM;
------------------------------------------------------------------------
expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
------------------------------------------------------------------------
EOM
}
}
sub getline {
my $self = shift;
my $mode = $self->[1];
if ( $mode ne 'r' ) {
confess <<EOM;
------------------------------------------------------------------------
getline requires mode = 'r' but mode = ($mode); trace follows:
------------------------------------------------------------------------
EOM
}
my $i = $self->[2]++;
return $self->[0]->[$i];
}
sub print {
my $self = shift;
my $mode = $self->[1];
if ( $mode ne 'w' ) {
confess <<EOM;
------------------------------------------------------------------------
print requires mode = 'w' but mode = ($mode); trace follows:
------------------------------------------------------------------------
EOM
}
push @{ $self->[0] }, $_[0];
}
sub close { return }
package Perl::Tidy::LineSource;
sub new {
my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
my $input_file_copy = undef;
my $fh_copy;
my $input_line_ending;
if ( $rOpts->{'preserve-line-endings'} ) {
$input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
}
( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
return undef unless $fh;
if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
{
$rOpts->{'check-syntax'} = 0;
$input_file_copy = '-';
$$rpending_logfile_message .= <<EOM;
Note: --syntax check will be skipped because standard input is used
EOM
}
return bless {
_fh => $fh,
_fh_copy => $fh_copy,
_filename => $input_file,
_input_file_copy => $input_file_copy,
_input_line_ending => $input_line_ending,
_rinput_buffer => [],
_started => 0,
}, $class;
}
sub get_input_file_copy_name {
my $self = shift;
my $ifname = $self->{_input_file_copy};
unless ($ifname) {
$ifname = $self->{_filename};
}
return $ifname;
}
sub close_input_file {
my $self = shift;
eval { $self->{_fh}->close() };
eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
}
sub get_line {
my $self = shift;
my $line = undef;
my $fh = $self->{_fh};
my $fh_copy = $self->{_fh_copy};
my $rinput_buffer = $self->{_rinput_buffer};
if ( scalar(@$rinput_buffer) ) {
$line = shift @$rinput_buffer;
}
else {
$line = $fh->getline();
if ( $line && !$self->{_started} ) {
if ( $line =~ /[\015][^\015\012]/ ) {
@$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
my $count = @$rinput_buffer;
$line = shift @$rinput_buffer;
}
$self->{_started}++;
}
}
if ( $line && $fh_copy ) { $fh_copy->print($line); }
return $line;
}
package Perl::Tidy::LineSink;
sub new {
my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
$rpending_logfile_message, $binmode )
= @_;
my $fh = undef;
my $fh_copy = undef;
my $fh_tee = undef;
my $output_file_copy = "";
my $output_file_open = 0;
if ( $rOpts->{'format'} eq 'tidy' ) {
( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
unless ($fh) { die "Cannot write to output stream\n"; }
$output_file_open = 1;
if ($binmode) {
if ( ref($fh) eq 'IO::File' ) {
binmode $fh;
}
if ( $output_file eq '-' ) { binmode STDOUT }
}
}
if ( $output_file eq '-' || ref $output_file ) {
if ( $rOpts->{'check-syntax'} ) {
$rOpts->{'check-syntax'} = 0;
$output_file_copy = '-';
$$rpending_logfile_message .= <<EOM;
Note: --syntax check will be skipped because standard output is used
EOM
}
}
bless {
_fh => $fh,
_fh_copy => $fh_copy,
_fh_tee => $fh_tee,
_output_file => $output_file,
_output_file_open => $output_file_open,
_output_file_copy => $output_file_copy,
_tee_flag => 0,
_tee_file => $tee_file,
_tee_file_opened => 0,
_line_separator => $line_separator,
_binmode => $binmode,
}, $class;
}
sub write_line {
my $self = shift;
my $fh = $self->{_fh};
my $fh_copy = $self->{_fh_copy};
my $output_file_open = $self->{_output_file_open};
chomp $_[0];
$_[0] .= $self->{_line_separator};
$fh->print( $_[0] ) if ( $self->{_output_file_open} );
print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
if ( $self->{_tee_flag} ) {
unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
my $fh_tee = $self->{_fh_tee};
print $fh_tee $_[0];
}
}
sub get_output_file_copy {
my $self = shift;
my $ofname = $self->{_output_file_copy};
unless ($ofname) {
$ofname = $self->{_output_file};
}
return $ofname;
}
sub tee_on {
my $self = shift;
$self->{_tee_flag} = 1;
}
sub tee_off {
my $self = shift;
$self->{_tee_flag} = 0;
}
sub really_open_tee_file {
my $self = shift;
my $tee_file = $self->{_tee_file};
my $fh_tee;
$fh_tee = IO::File->new(">$tee_file")
or die("couldn't open TEE file $tee_file: $!\n");
binmode $fh_tee if $self->{_binmode};
$self->{_tee_file_opened} = 1;
$self->{_fh_tee} = $fh_tee;
}
sub close_output_file {
my $self = shift;
eval { $self->{_fh}->close() } if $self->{_output_file_open};
eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
$self->close_tee_file();
}
sub close_tee_file {
my $self = shift;
if ( $self->{_tee_file_opened} ) {
eval { $self->{_fh_tee}->close() };
$self->{_tee_file_opened} = 0;
}
}
package Perl::Tidy::Diagnostics;
sub new {
my $class = shift;
bless {
_write_diagnostics_count => 0,
_last_diagnostic_file => "",
_input_file => "",
_fh => undef,
}, $class;
}
sub set_input_file {
my $self = shift;
$self->{_input_file} = $_[0];
}
sub write_diagnostics {
my $self = shift;
unless ( $self->{_write_diagnostics_count} ) {
open DIAGNOSTICS, ">DIAGNOSTICS"
or death("couldn't open DIAGNOSTICS: $!\n");
}
my $last_diagnostic_file = $self->{_last_diagnostic_file};
my $input_file = $self->{_input_file};
if ( $last_diagnostic_file ne $input_file ) {
print DIAGNOSTICS "\nFILE:$input_file\n";
}
$self->{_last_diagnostic_file} = $input_file;
my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
print DIAGNOSTICS "$input_line_number:\t@_";
$self->{_write_diagnostics_count}++;
}
package Perl::Tidy::Logger;
sub new {
my $class = shift;
my $fh;
my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
unless ( ref($warning_file) ) {
if ( -e $warning_file ) { unlink($warning_file) }
}
bless {
_log_file => $log_file,
_fh_warnings => undef,
_rOpts => $rOpts,
_fh_warnings => undef,
_last_input_line_written => 0,
_at_end_of_file => 0,
_use_prefix => 1,
_block_log_output => 0,
_line_of_tokens => undef,
_output_line_number => undef,
_wrote_line_information_string => 0,
_wrote_column_headings => 0,
_warning_file => $warning_file,
_warning_count => 0,
_complaint_count => 0,
_saw_code_bug => -1, _saw_brace_error => 0,
_saw_extrude => $saw_extrude,
_output_array => [],
}, $class;
}
sub close_log_file {
my $self = shift;
if ( $self->{_fh_warnings} ) {
eval { $self->{_fh_warnings}->close() };
$self->{_fh_warnings} = undef;
}
}
sub get_warning_count {
my $self = shift;
return $self->{_warning_count};
}
sub get_use_prefix {
my $self = shift;
return $self->{_use_prefix};
}
sub block_log_output {
my $self = shift;
$self->{_block_log_output} = 1;
}
sub unblock_log_output {
my $self = shift;
$self->{_block_log_output} = 0;
}
sub interrupt_logfile {
my $self = shift;
$self->{_use_prefix} = 0;
$self->warning("\n");
$self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
}
sub resume_logfile {
my $self = shift;
$self->write_logfile_entry( '#' x 60 . "\n" );
$self->{_use_prefix} = 1;
}
sub we_are_at_the_last_line {
my $self = shift;
unless ( $self->{_wrote_line_information_string} ) {
$self->write_logfile_entry("Last line\n\n");
}
$self->{_at_end_of_file} = 1;
}
sub black_box {
my $self = shift;
my ( $line_of_tokens, $output_line_number ) = @_;
my $input_line = $line_of_tokens->{_line_text};
my $input_line_number = $line_of_tokens->{_line_number};
$self->{_line_of_tokens} = $line_of_tokens;
$self->{_output_line_number} = $output_line_number;
$self->{_wrote_line_information_string} = 0;
my $last_input_line_written = $self->{_last_input_line_written};
my $rOpts = $self->{_rOpts};
if (
(
( $input_line_number - $last_input_line_written ) >=
$rOpts->{'logfile-gap'}
)
|| ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
)
{
my $rlevels = $line_of_tokens->{_rlevels};
my $structural_indentation_level = $$rlevels[0];
$self->{_last_input_line_written} = $input_line_number;
( my $out_str = $input_line ) =~ s/^\s*//;
chomp $out_str;
$out_str = ( '.' x $structural_indentation_level ) . $out_str;
if ( length($out_str) > 35 ) {
$out_str = substr( $out_str, 0, 35 ) . " ....";
}
$self->logfile_output( "", "$out_str\n" );
}
}
sub write_logfile_entry {
my $self = shift;
$self->logfile_output( ">>>", "@_" );
}
sub write_column_headings {
my $self = shift;
$self->{_wrote_column_headings} = 1;
my $routput_array = $self->{_output_array};
push @{$routput_array}, <<EOM;
The nesting depths in the table below are at the start of the lines.
The indicated output line numbers are not always exact.
ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
in:out indent c b nesting code + messages; (messages begin with >>>)
lines levels i k (code begins with one '.' per indent level)
------ ----- - - -------- -------------------------------------------
EOM
}
sub make_line_information_string {
my $self = shift;
my $line_of_tokens = $self->{_line_of_tokens};
my $input_line_number = $line_of_tokens->{_line_number};
my $line_information_string = "";
if ($input_line_number) {
my $output_line_number = $self->{_output_line_number};
my $brace_depth = $line_of_tokens->{_curly_brace_depth};
my $paren_depth = $line_of_tokens->{_paren_depth};
my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
my $python_indentation_level =
$line_of_tokens->{_python_indentation_level};
my $rlevels = $line_of_tokens->{_rlevels};
my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
my $rci_levels = $line_of_tokens->{_rci_levels};
my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
my $structural_indentation_level = $$rlevels[0];
$self->write_column_headings() unless $self->{_wrote_column_headings};
my $extra_space = "";
$extra_space .=
( $input_line_number < 10 ) ? " "
: ( $input_line_number < 100 ) ? " "
: "";
$extra_space .=
( $output_line_number < 10 ) ? " "
: ( $output_line_number < 100 ) ? " "
: "";
my $nesting_string =
"($paren_depth [$square_bracket_depth {$brace_depth";
my $nesting_string_new = $$rnesting_tokens[0];
my $ci_level = $$rci_levels[0];
if ( $ci_level > 9 ) { $ci_level = '*' }
my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
if ( length($nesting_string_new) <= 8 ) {
$nesting_string =
$nesting_string_new . " " x ( 8 - length($nesting_string_new) );
}
if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
$line_information_string =
"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
}
return $line_information_string;
}
sub logfile_output {
my $self = shift;
my ( $prompt, $msg ) = @_;
return if ( $self->{_block_log_output} );
my $routput_array = $self->{_output_array};
if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
push @{$routput_array}, "$msg";
}
else {
my $line_information_string = $self->make_line_information_string();
$self->{_wrote_line_information_string} = 1;
if ($line_information_string) {
push @{$routput_array}, "$line_information_string $prompt$msg";
}
else {
push @{$routput_array}, "$msg";
}
}
}
sub get_saw_brace_error {
my $self = shift;
return $self->{_saw_brace_error};
}
sub increment_brace_error {
my $self = shift;
$self->{_saw_brace_error}++;
}
sub brace_warning {
my $self = shift;
use constant BRACE_WARNING_LIMIT => 10;
my $saw_brace_error = $self->{_saw_brace_error};
if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
$self->warning(@_);
}
$saw_brace_error++;
$self->{_saw_brace_error} = $saw_brace_error;
if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
$self->warning("No further warnings of this type will be given\n");
}
}
sub complain {
my $self = shift;
my $rOpts = $self->{_rOpts};
if ( $rOpts->{'warning-output'} ) {
$self->warning(@_);
}
else {
$self->{_complaint_count}++;
$self->write_logfile_entry(@_);
}
}
sub warning {
my $self = shift;
use constant WARNING_LIMIT => 50;
my $rOpts = $self->{_rOpts};
unless ( $rOpts->{'quiet'} ) {
my $warning_count = $self->{_warning_count};
unless ($warning_count) {
my $warning_file = $self->{_warning_file};
my $fh_warnings;
if ( $rOpts->{'standard-error-output'} ) {
$fh_warnings = *STDERR;
}
else {
( $fh_warnings, my $filename ) =
Perl::Tidy::streamhandle( $warning_file, 'w' );
$fh_warnings or die("couldn't open $filename $!\n");
warn "## Please see file $filename\n";
}
$self->{_fh_warnings} = $fh_warnings;
}
my $fh_warnings = $self->{_fh_warnings};
if ( $warning_count < WARNING_LIMIT ) {
if ( $self->get_use_prefix() > 0 ) {
my $input_line_number =
Perl::Tidy::Tokenizer::get_input_line_number();
$fh_warnings->print("$input_line_number:\t@_");
$self->write_logfile_entry("WARNING: @_");
}
else {
$fh_warnings->print(@_);
$self->write_logfile_entry(@_);
}
}
$warning_count++;
$self->{_warning_count} = $warning_count;
if ( $warning_count == WARNING_LIMIT ) {
$fh_warnings->print("No further warnings will be given\n");
}
}
}
sub report_possible_bug {
my $self = shift;
my $saw_code_bug = $self->{_saw_code_bug};
$self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
}
sub report_definite_bug {
my $self = shift;
$self->{_saw_code_bug} = 1;
}
sub ask_user_for_bug_report {
my $self = shift;
my ( $infile_syntax_ok, $formatter ) = @_;
my $saw_code_bug = $self->{_saw_code_bug};
if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
$self->warning(<<EOM);
You may have encountered a code bug in perltidy. If you think so, and
the problem is not listed in the BUGS file at
http://perltidy.sourceforge.net, please report it so that it can be
corrected. Include the smallest possible script which has the problem,
along with the .LOG file. See the manual pages for contact information.
Thank you!
EOM
}
elsif ( $saw_code_bug == 1 ) {
if ( $self->{_saw_extrude} ) {
$self->warning(<<EOM);
You may have encountered a bug in perltidy. However, since you are using the
-extrude option, the problem may be with perl or one of its modules, which have
occasional problems with this type of file. If you believe that the
problem is with perltidy, and the problem is not listed in the BUGS file at
http://perltidy.sourceforge.net, please report it so that it can be corrected.
Include the smallest possible script which has the problem, along with the .LOG
file. See the manual pages for contact information.
Thank you!
EOM
}
else {
$self->warning(<<EOM);
Oops, you seem to have encountered a bug in perltidy. Please check the
BUGS file at http://perltidy.sourceforge.net. If the problem is not
listed there, please report it so that it can be corrected. Include the
smallest possible script which produces this message, along with the
.LOG file if appropriate. See the manual pages for contact information.
Your efforts are appreciated.
Thank you!
EOM
my $added_semicolon_count = 0;
eval {
$added_semicolon_count =
$formatter->get_added_semicolon_count();
};
if ( $added_semicolon_count > 0 ) {
$self->warning(<<EOM);
The log file shows that perltidy added $added_semicolon_count semicolons.
Please rerun with -nasc to see if that is the cause of the syntax error. Even
if that is the problem, please report it so that it can be fixed.
EOM
}
}
}
}
sub finish {
my $self = shift;
my ( $infile_syntax_ok, $formatter ) = @_;
my $rOpts = $self->{_rOpts};
my $warning_count = $self->{_warning_count};
my $saw_code_bug = $self->{_saw_code_bug};
my $save_logfile =
( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
|| $saw_code_bug == 1
|| $rOpts->{'logfile'};
my $log_file = $self->{_log_file};
if ($warning_count) {
if ($save_logfile) {
$self->block_log_output(); $self->warning(
"The logfile $log_file may contain useful information\n");
$self->unblock_log_output();
}
if ( $self->{_complaint_count} > 0 ) {
$self->warning(
"To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
);
}
if ( $self->{_saw_brace_error}
&& ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
{
$self->warning("To save a full .LOG file rerun with -g\n");
}
}
$self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
if ($save_logfile) {
my $log_file = $self->{_log_file};
my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
if ($fh) {
my $routput_array = $self->{_output_array};
foreach ( @{$routput_array} ) { $fh->print($_) }
eval { $fh->close() };
}
}
}
package Perl::Tidy::DevNull;
sub new { return bless {}, $_[0] }
sub print { return }
sub close { return }
package Perl::Tidy::HtmlWriter;
use File::Basename;
use vars qw{
%html_color
%html_bold
%html_italic
%token_short_names
%short_to_long_names
$rOpts
$css_filename
$css_linkname
$missing_html_entities
};
{ eval "use HTML::Entities"; $missing_html_entities = $@; }
sub new {
my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
$html_src_extension )
= @_;
my $html_file_opened = 0;
my $html_fh;
( $html_fh, my $html_filename ) =
Perl::Tidy::streamhandle( $html_file, 'w' );
unless ($html_fh) {
warn("can't open $html_file: $!\n");
return undef;
}
$html_file_opened = 1;
if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
$input_file = "NONAME";
}
my $toc_string;
my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
my $html_pre_fh;
my @pre_string_stack;
if ( $rOpts->{'html-pre-only'} ) {
$html_pre_fh = $html_fh;
$html_pre_fh->print( <<"PRE_END");
<pre>
PRE_END
}
else {
my $pre_string;
$html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
push @pre_string_stack, \$pre_string;
}
my $html_pod_fh;
my $pod_string;
if ( $rOpts->{'pod2html'} ) {
if ( $rOpts->{'html-pre-only'} ) {
undef $rOpts->{'pod2html'};
}
else {
eval "use Pod::Html";
if ($@) {
warn
"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
undef $rOpts->{'pod2html'};
}
else {
$html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
}
}
}
my $toc_filename;
my $src_filename;
if ( $rOpts->{'frames'} ) {
unless ($extension) {
warn
"cannot use frames without a specified output extension; ignoring -frm\n";
undef $rOpts->{'frames'};
}
else {
$toc_filename = $input_file . $html_toc_extension . $extension;
$src_filename = $input_file . $html_src_extension . $extension;
}
}
my $title = $rOpts->{'title'};
unless ($title) {
( $title, my $path ) = fileparse($input_file);
}
my $toc_item_count = 0;
my $in_toc_package = "";
my $last_level = 0;
bless {
_input_file => $input_file, _title => $title, _html_file => $html_file, _toc_filename => $toc_filename, _src_filename => $src_filename, _html_file_opened => $html_file_opened, _html_fh => $html_fh, _html_pre_fh => $html_pre_fh, _rpre_string_stack => \@pre_string_stack, _html_pod_fh => $html_pod_fh, _rpod_string => \$pod_string, _pod_cut_count => 0, _html_toc_fh => $html_toc_fh, _rtoc_string => \$toc_string, _rtoc_item_count => \$toc_item_count, _rin_toc_package => \$in_toc_package, _rtoc_name_count => {}, _rpackage_stack => [], _rlast_level => \$last_level, }, $class;
}
sub add_toc_item {
my $self = shift;
my ( $name, $type ) = @_;
my $html_toc_fh = $self->{_html_toc_fh};
my $html_pre_fh = $self->{_html_pre_fh};
my $rtoc_name_count = $self->{_rtoc_name_count};
my $rtoc_item_count = $self->{_rtoc_item_count};
my $rlast_level = $self->{_rlast_level};
my $rin_toc_package = $self->{_rin_toc_package};
my $rpackage_stack = $self->{_rpackage_stack};
my $end_package_list = sub {
if ($$rin_toc_package) {
$html_toc_fh->print("</ul>\n</li>\n");
$$rin_toc_package = "";
}
};
my $start_package_list = sub {
my ( $unique_name, $package ) = @_;
if ($$rin_toc_package) { $end_package_list->() }
$html_toc_fh->print(<<EOM);
<li><a href=\"<ul>
EOM
$$rin_toc_package = $package;
};
unless ($$rtoc_item_count) {
return if ( $type eq 'EOF' );
$html_toc_fh->print( <<"TOC_END");
<!-- BEGIN CODE INDEX --><a name="code-index"></a>
<ul>
TOC_END
}
$$rtoc_item_count++;
my $unique_name = $name;
if ( $type eq 'package' ) { $unique_name = "package-$name" }
if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
$unique_name .= "-$count";
}
if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
if ( $type eq 'sub' ) {
my $package = $rpackage_stack->[$$rlast_level];
unless ($package) { $package = 'main' }
if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
$end_package_list->();
}
unless ($$rin_toc_package) {
$start_package_list->( $unique_name, $package );
}
}
if ( $type eq 'package' ) {
$start_package_list->( $unique_name, $name );
}
elsif ( $type eq 'sub' ) {
$html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
}
else {
$end_package_list->();
$html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
}
$html_pre_fh->print("<a name=\"$unique_name\"></a>");
if ( $type eq 'EOF' ) {
$html_toc_fh->print( <<"TOC_END");
</ul>
<!-- END CODE INDEX -->
TOC_END
}
}
BEGIN {
%short_to_long_names = (
'n' => 'numeric',
'p' => 'paren',
'q' => 'quote',
's' => 'structure',
'c' => 'comment',
'v' => 'v-string',
'cm' => 'comma',
'w' => 'bareword',
'co' => 'colon',
'pu' => 'punctuation',
'i' => 'identifier',
'j' => 'label',
'h' => 'here-doc-target',
'hh' => 'here-doc-text',
'k' => 'keyword',
'sc' => 'semicolon',
'm' => 'subroutine',
'pd' => 'pod-text',
);
%token_short_names = (
'#' => 'c',
'n' => 'n',
'v' => 'v',
'k' => 'k',
'F' => 'k',
'Q' => 'q',
'q' => 'q',
'J' => 'j',
'j' => 'j',
'h' => 'h',
'H' => 'hh',
'w' => 'w',
',' => 'cm',
'=>' => 'cm',
';' => 'sc',
':' => 'co',
'f' => 'sc',
'(' => 'p',
')' => 'p',
'M' => 'm',
'P' => 'pd',
'A' => 'co',
);
my @identifier = qw" i t U C Y Z G :: ";
@token_short_names{@identifier} = ('i') x scalar(@identifier);
my @structure = qw" { } ";
@token_short_names{@structure} = ('s') x scalar(@structure);
}
sub make_getopt_long_names {
my $class = shift;
my ($rgetopt_names) = @_;
while ( my ( $short_name, $name ) = each %short_to_long_names ) {
push @$rgetopt_names, "html-color-$name=s";
push @$rgetopt_names, "html-italic-$name!";
push @$rgetopt_names, "html-bold-$name!";
}
push @$rgetopt_names, "html-color-background=s";
push @$rgetopt_names, "html-linked-style-sheet=s";
push @$rgetopt_names, "nohtml-style-sheets";
push @$rgetopt_names, "html-pre-only";
push @$rgetopt_names, "html-line-numbers";
push @$rgetopt_names, "html-entities!";
push @$rgetopt_names, "stylesheet";
push @$rgetopt_names, "html-table-of-contents!";
push @$rgetopt_names, "pod2html!";
push @$rgetopt_names, "frames!";
push @$rgetopt_names, "html-toc-extension=s";
push @$rgetopt_names, "html-src-extension=s";
push @$rgetopt_names, "backlink=s";
push @$rgetopt_names, "cachedir=s";
push @$rgetopt_names, "htmlroot=s";
push @$rgetopt_names, "libpods=s";
push @$rgetopt_names, "podpath=s";
push @$rgetopt_names, "podroot=s";
push @$rgetopt_names, "title=s";
push @$rgetopt_names, "podquiet!";
push @$rgetopt_names, "podverbose!";
push @$rgetopt_names, "podrecurse!";
push @$rgetopt_names, "podflush";
push @$rgetopt_names, "podheader!";
push @$rgetopt_names, "podindex!";
}
sub make_abbreviated_names {
my $class = shift;
my ($rexpansion) = @_;
while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
}
${$rexpansion}{"hcbg"} = ["html-color-background"];
${$rexpansion}{"pre"} = ["html-pre-only"];
${$rexpansion}{"toc"} = ["html-table-of-contents"];
${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
${$rexpansion}{"nnn"} = ["html-line-numbers"];
${$rexpansion}{"hent"} = ["html-entities"];
${$rexpansion}{"nhent"} = ["nohtml-entities"];
${$rexpansion}{"css"} = ["html-linked-style-sheet"];
${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
${$rexpansion}{"ss"} = ["stylesheet"];
${$rexpansion}{"pod"} = ["pod2html"];
${$rexpansion}{"npod"} = ["nopod2html"];
${$rexpansion}{"frm"} = ["frames"];
${$rexpansion}{"nfrm"} = ["noframes"];
${$rexpansion}{"text"} = ["html-toc-extension"];
${$rexpansion}{"sext"} = ["html-src-extension"];
}
sub check_options {
my $class = shift;
$rOpts = shift;
use constant ForestGreen => "#228B22";
use constant SaddleBrown => "#8B4513";
use constant magenta4 => "#8B008B";
use constant IndianRed3 => "#CD5555";
use constant DeepSkyBlue4 => "#00688B";
use constant MediumOrchid3 => "#B452CD";
use constant black => "#000000";
use constant white => "#FFFFFF";
use constant red => "#FF0000";
set_default_properties( 'c', ForestGreen, 0, 0 );
set_default_properties( 'pd', ForestGreen, 0, 1 );
set_default_properties( 'k', magenta4, 1, 0 ); set_default_properties( 'q', IndianRed3, 0, 0 );
set_default_properties( 'hh', IndianRed3, 0, 1 );
set_default_properties( 'h', IndianRed3, 1, 0 );
set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
set_default_properties( 'w', black, 0, 0 );
set_default_properties( 'n', MediumOrchid3, 0, 0 );
set_default_properties( 'v', MediumOrchid3, 0, 0 );
set_default_properties( 'j', IndianRed3, 1, 0 );
set_default_properties( 'm', red, 1, 0 );
set_default_color( 'html-color-background', white );
set_default_color( 'html-color-punctuation', black );
while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
$html_color{$short_name} = $rOpts->{"html-color-$long_name"};
$html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
$html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
}
if ( defined( $rOpts->{'stylesheet'} ) ) {
write_style_sheet_file('-');
exit 1;
}
if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
$css_linkname = $rOpts->{'html-linked-style-sheet'};
if ( $css_linkname =~ /^-/ ) {
die "You must specify a valid filename after -css\n";
}
}
if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
$rOpts->{'nohtml-style-sheets'} = 0;
warning("You can't specify both -css and -nss; -nss ignored\n");
}
if ($css_linkname) {
my $css_filename = $css_linkname;
unless ( -e $css_filename ) {
write_style_sheet_file($css_filename);
}
}
$missing_html_entities = 1 unless $rOpts->{'html-entities'};
}
sub write_style_sheet_file {
my $css_filename = shift;
my $fh;
unless ( $fh = IO::File->new("> $css_filename") ) {
die "can't open $css_filename: $!\n";
}
write_style_sheet_data($fh);
eval { $fh->close };
}
sub write_style_sheet_data {
my $fh = shift;
my $bg_color = $rOpts->{'html-color-background'};
my $text_color = $rOpts->{'html-color-punctuation'};
my $pre_bg_color = $rOpts->{'html-pre-color-background'};
$pre_bg_color = $bg_color unless $pre_bg_color;
$fh->print(<<"EOM");
/* default style sheet generated by perltidy */
body {background: $bg_color; color: $text_color}
pre { color: $text_color;
background: $pre_bg_color;
font-family: courier;
}
EOM
foreach my $short_name ( sort keys %short_to_long_names ) {
my $long_name = $short_to_long_names{$short_name};
my $abbrev = '.' . $short_name;
if ( length($short_name) == 1 ) { $abbrev .= ' ' } my $color = $html_color{$short_name};
if ( !defined($color) ) { $color = $text_color }
$fh->print("$abbrev \{ color: $color;");
if ( $html_bold{$short_name} ) {
$fh->print(" font-weight:bold;");
}
if ( $html_italic{$short_name} ) {
$fh->print(" font-style:italic;");
}
$fh->print("} /* $long_name */\n");
}
}
sub set_default_color {
my ( $key, $color ) = @_;
if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
$rOpts->{$key} = check_RGB($color);
}
sub check_RGB {
my ($color) = @_;
if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
return $color;
}
sub set_default_properties {
my ( $short_name, $color, $bold, $italic ) = @_;
set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
my $key;
$key = "html-bold-$short_to_long_names{$short_name}";
$rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
$key = "html-italic-$short_to_long_names{$short_name}";
$rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
}
sub pod_to_html {
my $self = shift;
my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
my $input_file = $self->{_input_file};
my $title = $self->{_title};
my $success_flag = 0;
unless ($pod_string) {
return $success_flag;
}
my $tmpfile;
if ( $rOpts->{'frames'} ) {
$tmpfile = $self->{_toc_filename};
}
else {
$tmpfile = Perl::Tidy::make_temporary_filename();
}
my $fh_tmp = IO::File->new( $tmpfile, 'w' );
unless ($fh_tmp) {
warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
return $success_flag;
}
$fh_tmp->print($pod_string);
$fh_tmp->close();
{
my @args;
push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
my $kw;
foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
}
foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
my $kwd = $kw; if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
elsif ( defined( $rOpts->{$kw} ) ) {
$kwd =~ s/^pod//;
push @args, "--no$kwd";
}
}
$kw = 'podflush';
if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
local $SIG{__DIE__} = sub {
print $_[0];
unlink $tmpfile if -e $tmpfile;
exit 1;
};
pod2html(@args);
}
$fh_tmp = IO::File->new( $tmpfile, 'r' );
unless ($fh_tmp) {
warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
goto RETURN;
}
my $html_fh = $self->{_html_fh};
my @toc;
my $in_toc;
my $no_print;
my $html_print = sub {
foreach (@_) {
$html_fh->print($_) unless ($no_print);
if ($in_toc) { push @toc, $_ }
}
};
my ( $saw_body, $saw_index, $saw_body_end );
while ( my $line = $fh_tmp->getline() ) {
if ( $line =~ /^\s*<html>\s*$/i ) {
my $date = localtime;
$html_print->("<!-- Generated by perltidy on $date -->\n");
$html_print->($line);
}
elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
$saw_body = 1;
$html_print->($css_string) if $css_string;
$html_print->($line);
$html_print->("<a name=\"-top-\"></a>\n");
$title = escape_html($title);
$html_print->("<h1>$title</h1>\n");
}
elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
$in_toc = 1;
$no_print = $rOpts->{'frames'}
|| !$rOpts->{'html-table-of-contents'};
$html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
$html_print->($line);
}
elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
$saw_index = 1;
$html_print->($line);
if ($toc_string) {
$html_print->("<hr />\n") if $rOpts->{'frames'};
$html_print->("<h2>Code Index:</h2>\n");
my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
$html_print->(@toc);
}
$in_toc = 0;
$no_print = 0;
}
elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
$line = $2;
$html_print->($1) if $1;
if ( $self->{_pod_cut_count} > 1 ) {
my $rpre_string = shift(@$rpre_string_stack);
if ($$rpre_string) {
$html_print->('<pre>');
$html_print->($$rpre_string);
$html_print->('</pre>');
}
else {
warn
"Problem merging html stream with pod2html; order may be wrong\n";
}
$html_print->($line);
}
else {
$html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
}
}
elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
$saw_body_end = 1;
if (@$rpre_string_stack) {
unless ( $self->{_pod_cut_count} > 1 ) {
$html_print->('<hr />');
}
while ( my $rpre_string = shift(@$rpre_string_stack) ) {
$html_print->('<pre>');
$html_print->($$rpre_string);
$html_print->('</pre>');
}
}
$html_print->($line);
}
else {
$html_print->($line);
}
}
$success_flag = 1;
unless ($saw_body) {
warn "Did not see <body> in pod2html output\n";
$success_flag = 0;
}
unless ($saw_body_end) {
warn "Did not see </body> in pod2html output\n";
$success_flag = 0;
}
unless ($saw_index) {
warn "Did not find INDEX END in pod2html output\n";
$success_flag = 0;
}
RETURN:
eval { $html_fh->close() };
unlink $tmpfile if -e $tmpfile;
if ( $success_flag && $rOpts->{'frames'} ) {
$self->make_frame( \@toc );
}
return $success_flag;
}
sub make_frame {
my $self = shift;
my ($rtoc) = @_;
my $input_file = $self->{_input_file};
my $html_filename = $self->{_html_file};
my $toc_filename = $self->{_toc_filename};
my $src_filename = $self->{_src_filename};
my $title = $self->{_title};
$title = escape_html($title);
my $top_basename = "";
my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
my ( $src_basename, $src_path ) = fileparse($src_filename);
my $src_frame_name = 'SRC';
my $first_anchor =
write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
$src_frame_name );
rename( $html_filename, $src_filename )
or die "Cannot rename $html_filename to $src_filename:$!\n";
write_frame_html(
$title, $html_filename, $top_basename,
$toc_basename, $src_basename, $src_frame_name
);
}
sub write_toc_html {
my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
my $fh = IO::File->new( $toc_filename, 'w' )
or die "Cannot open $toc_filename:$!\n";
$fh->print(<<EOM);
<html>
<head>
<title>$title</title>
</head>
<body>
<h1><a href=\"$src_basenameEOM
my $first_anchor =
change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
$fh->print( join "", @$rtoc );
$fh->print(<<EOM);
</body>
</html>
EOM
}
sub write_frame_html {
my (
$title, $frame_filename, $top_basename,
$toc_basename, $src_basename, $src_frame_name
) = @_;
my $fh = IO::File->new( $frame_filename, 'w' )
or die "Cannot open $toc_basename:$!\n";
$fh->print(<<EOM);
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
<?xml version="1.0" encoding="iso-8859-1" ?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>$title</title>
</head>
EOM
if ($top_basename) {
$fh->print(<<EOM);
<frameset cols="20%,80%">
<frameset rows="30%,70%">
<frame src = "$top_basename" />
<frame src = "$toc_basename" />
</frameset>
EOM
}
else {
$fh->print(<<EOM);
<frameset cols="20%,*">
<frame src = "$toc_basename" />
EOM
}
$fh->print(<<EOM);
<frame src = "$src_basename" name = "$src_frame_name" />
<noframes>
<body>
<p>If you see this message, you are using a non-frame-capable web client.</p>
<p>This document contains:</p>
<ul>
<li><a href="$toc_basename">A table of contents</a></li>
<li><a href="$src_basename">The source code</a></li>
</ul>
</body>
</noframes>
</frameset>
</html>
EOM
}
sub change_anchor_names {
my ( $rlines, $filename, $target ) = @_;
my $first_anchor;
foreach my $line (@$rlines) {
if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
my $pre = $1;
my $name = $4;
my $post = $5;
my $href = "$filename $line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
unless ($first_anchor) { $first_anchor = $href }
}
}
return $first_anchor;
}
sub close_html_file {
my $self = shift;
return unless $self->{_html_file_opened};
my $html_fh = $self->{_html_fh};
my $rtoc_string = $self->{_rtoc_string};
if ( $rOpts->{'html-pre-only'} ) {
$html_fh->print( <<"PRE_END");
</pre>
PRE_END
eval { $html_fh->close() };
return;
}
$self->add_toc_item( 'EOF', 'EOF' );
my $rpre_string_stack = $self->{_rpre_string_stack};
if ( $rOpts->{pod2html}
&& $self->{_pod_cut_count} >= 1
&& $rOpts->{'html-color-background'} eq '#FFFFFF' )
{
$rOpts->{'html-pre-color-background'} = '#F0F0F0';
}
my $css_string;
my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
if ( $rOpts->{'html-linked-style-sheet'} ) {
$fh_css->print(
qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
);
}
elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
$fh_css->print( <<'ENDCSS');
<style type="text/css">
<!--
ENDCSS
write_style_sheet_data($fh_css);
$fh_css->print( <<"ENDCSS");
-->
</style>
ENDCSS
}
if ( $rOpts->{'pod2html'} ) {
my $rpod_string = $self->{_rpod_string};
$self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
$rpre_string_stack )
&& return;
}
my $input_file = $self->{_input_file};
my $title = escape_html($input_file);
my $date = localtime;
$html_fh->print( <<"HTML_START");
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!-- Generated by perltidy on $date -->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>$title</title>
HTML_START
if ($css_string) {
$html_fh->print($css_string);
$html_fh->print( <<"ENDCSS");
</head>
<body>
ENDCSS
}
else {
$html_fh->print( <<"HTML_START");
</head>
<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
HTML_START
}
$html_fh->print("<a name=\"-top-\"></a>\n");
$html_fh->print( <<"EOM");
<h1>$title</h1>
EOM
if ( $$rtoc_string
&& !$rOpts->{'frames'}
&& $rOpts->{'html-table-of-contents'} )
{
$html_fh->print($$rtoc_string);
}
my $fname_comment = $input_file;
$fname_comment =~ s/--+/-/g; $html_fh->print( <<"END_PRE");
<hr />
<!-- contents of filename: $fname_comment -->
<pre>
END_PRE
foreach my $rpre_string (@$rpre_string_stack) {
$html_fh->print($$rpre_string);
}
$html_fh->print( <<"HTML_END");
</pre>
</body>
</html>
HTML_END
eval { $html_fh->close() };
if ( $rOpts->{'frames'} ) {
my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
$self->make_frame( \@toc );
}
}
sub markup_tokens {
my $self = shift;
my ( $rtokens, $rtoken_type, $rlevels ) = @_;
my ( @colored_tokens, $j, $string, $type, $token, $level );
my $rlast_level = $self->{_rlast_level};
my $rpackage_stack = $self->{_rpackage_stack};
for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
$type = $$rtoken_type[$j];
$token = $$rtokens[$j];
$level = $$rlevels[$j];
$level = 0 if ( $level < 0 );
if ( $level > $$rlast_level ) {
unless ( $rpackage_stack->[ $level - 1 ] ) {
$rpackage_stack->[ $level - 1 ] = 'main';
}
$rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
}
elsif ( $level < $$rlast_level ) {
my $package = $rpackage_stack->[$level];
unless ($package) { $package = 'main' }
if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
$self->add_toc_item( $package, 'package' );
}
}
$$rlast_level = $level;
if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
$token = $self->markup_html_element( $1, 'k' );
push @colored_tokens, $token;
$token = $2;
$type = 'M';
my $signature = join "", @$rtoken_type;
unless ( $signature =~ /^i;/ ) {
my $subname = $token;
$subname =~ s/[\s\(].*$//; # remove any attributes and prototype
$self->add_toc_item( $subname, 'sub' );
}
}
if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
$token = $self->markup_html_element( $1, 'k' );
push @colored_tokens, $token;
$token = $2;
$type = 'i';
$self->add_toc_item( "$token", 'package' );
$rpackage_stack->[$level] = $token;
}
$token = $self->markup_html_element( $token, $type );
push @colored_tokens, $token;
}
return ( \@colored_tokens );
}
sub markup_html_element {
my $self = shift;
my ( $token, $type ) = @_;
return $token if ( $type eq 'b' ); return $token if ( $token =~ /^\s*$/ ); $token = escape_html($token);
my $short_name = $token_short_names{$type};
if ( !defined($short_name) ) {
$short_name = "pu"; }
if ( !$rOpts->{'nohtml-style-sheets'} ) {
if ( $short_name ne 'pu' ) {
$token = qq(<span class="$short_name">) . $token . "</span>";
}
}
else {
my $color = $html_color{$short_name};
if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
$token = qq(<font color="$color">) . $token . "</font>";
}
if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
}
return $token;
}
sub escape_html {
my $token = shift;
if ($missing_html_entities) {
$token =~ s/\&/&/g;
$token =~ s/\</</g;
$token =~ s/\>/>/g;
$token =~ s/\"/"/g;
}
else {
HTML::Entities::encode_entities($token);
}
return $token;
}
sub finish_formatting {
my $self = shift;
$self->close_html_file();
return;
}
sub write_line {
my $self = shift;
return unless $self->{_html_file_opened};
my $html_pre_fh = $self->{_html_pre_fh};
my ($line_of_tokens) = @_;
my $line_type = $line_of_tokens->{_line_type};
my $input_line = $line_of_tokens->{_line_text};
my $line_number = $line_of_tokens->{_line_number};
chomp $input_line;
my $html_line;
if ( $line_type eq 'CODE' ) {
my $rtoken_type = $line_of_tokens->{_rtoken_type};
my $rtokens = $line_of_tokens->{_rtokens};
my $rlevels = $line_of_tokens->{_rlevels};
if ( $input_line =~ /(^\s*)/ ) {
$html_line = $1;
}
else {
$html_line = "";
}
my ($rcolored_tokens) =
$self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
$html_line .= join '', @$rcolored_tokens;
}
else {
my $line_character;
if ( $line_type eq 'HERE' ) { $line_character = 'H' }
elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
elsif ( $line_type eq 'END_START' ) {
$line_character = 'k';
$self->add_toc_item( '__END__', '__END__' );
}
elsif ( $line_type eq 'DATA_START' ) {
$line_character = 'k';
$self->add_toc_item( '__DATA__', '__DATA__' );
}
elsif ( $line_type =~ /^POD/ ) {
$line_character = 'P';
if ( $rOpts->{'pod2html'} ) {
my $html_pod_fh = $self->{_html_pod_fh};
if ( $line_type eq 'POD_START' ) {
my $rpre_string_stack = $self->{_rpre_string_stack};
my $rpre_string = $rpre_string_stack->[-1];
if ( $$rpre_string =~ /\S/ ) {
my $pre_string;
$html_pre_fh =
Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
$self->{_html_pre_fh} = $html_pre_fh;
push @$rpre_string_stack, \$pre_string;
my $for_html = '=for html'; $html_pod_fh->print(<<EOM);
$for_html
<!-- pERLTIDY sECTION -->
EOM
}
else {
$$rpre_string = "";
$html_pod_fh->print("\n");
}
}
$html_pod_fh->print( $input_line . "\n" );
if ( $line_type eq 'POD_END' ) {
$self->{_pod_cut_count}++;
$html_pod_fh->print("\n");
}
return;
}
}
else { $line_character = 'Q' }
$html_line = $self->markup_html_element( $input_line, $line_character );
}
if ( $rOpts->{'html-line-numbers'} ) {
my $extra_space .=
( $line_number < 10 ) ? " "
: ( $line_number < 100 ) ? " "
: ( $line_number < 1000 ) ? " "
: "";
$html_line = $extra_space . $line_number . " " . $html_line;
}
$html_pre_fh->print("$html_line\n");
}
package Perl::Tidy::Formatter;
BEGIN {
use constant FORMATTER_DEBUG_FLAG_BOND => 0;
use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
use constant FORMATTER_DEBUG_FLAG_CI => 0;
use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
use constant FORMATTER_DEBUG_FLAG_LIST => 0;
use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
use constant FORMATTER_DEBUG_FLAG_STORE => 0;
use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
my $debug_warning = sub {
print "FORMATTER_DEBUGGING with key $_[0]\n";
};
FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
}
use Carp;
use vars qw{
@gnu_stack
$max_gnu_stack_index
$gnu_position_predictor
$line_start_index_to_go
$last_indentation_written
$last_unadjusted_indentation
$last_leading_token
$saw_VERSION_in_this_file
$saw_END_or_DATA_
@gnu_item_list
$max_gnu_item_index
$gnu_sequence_number
$last_output_indentation
%last_gnu_equals
%gnu_comma_count
%gnu_arrow_count
@block_type_to_go
@type_sequence_to_go
@container_environment_to_go
@bond_strength_to_go
@forced_breakpoint_to_go
@lengths_to_go
@levels_to_go
@leading_spaces_to_go
@reduced_spaces_to_go
@matching_token_to_go
@mate_index_to_go
@nesting_blocks_to_go
@ci_levels_to_go
@nesting_depth_to_go
@nobreak_to_go
@old_breakpoint_to_go
@tokens_to_go
@types_to_go
%saved_opening_indentation
$max_index_to_go
$comma_count_in_batch
$old_line_count_in_batch
$last_nonblank_index_to_go
$last_nonblank_type_to_go
$last_nonblank_token_to_go
$last_last_nonblank_index_to_go
$last_last_nonblank_type_to_go
$last_last_nonblank_token_to_go
@nonblank_lines_at_depth
$starting_in_quote
$ending_in_quote
$in_format_skipping_section
$format_skipping_pattern_begin
$format_skipping_pattern_end
$forced_breakpoint_count
$forced_breakpoint_undo_count
@forced_breakpoint_undo_stack
%postponed_breakpoint
$tabbing
$embedded_tab_count
$first_embedded_tab_at
$last_embedded_tab_at
$deleted_semicolon_count
$first_deleted_semicolon_at
$last_deleted_semicolon_at
$added_semicolon_count
$first_added_semicolon_at
$last_added_semicolon_at
$first_tabbing_disagreement
$last_tabbing_disagreement
$in_tabbing_disagreement
$tabbing_disagreement_count
$input_line_tabbing
$last_line_type
$last_line_leading_type
$last_line_leading_level
$last_last_line_leading_level
%block_leading_text
%block_opening_line_number
$csc_new_statement_ok
$accumulating_text_for_block
$leading_block_text
$rleading_block_if_elsif_text
$leading_block_text_level
$leading_block_text_length_exceeded
$leading_block_text_line_length
$leading_block_text_line_number
$closing_side_comment_prefix_pattern
$closing_side_comment_list_pattern
$last_nonblank_token
$last_nonblank_type
$last_last_nonblank_token
$last_last_nonblank_type
$last_nonblank_block_type
$last_output_level
%is_do_follower
%is_if_brace_follower
%space_after_keyword
$rbrace_follower
$looking_for_else
%is_last_next_redo_return
%is_other_brace_follower
%is_else_brace_follower
%is_anon_sub_brace_follower
%is_anon_sub_1_brace_follower
%is_sort_map_grep
%is_sort_map_grep_eval
%is_sort_map_grep_eval_do
%is_block_without_semicolon
%is_if_unless
%is_and_or
%is_assignment
%is_chain_operator
%is_if_unless_and_or_last_next_redo_return
%is_until_while_for_if_elsif_else
@has_broken_sublist
@dont_align
@want_comma_break
$is_static_block_comment
$index_start_one_line_block
$semicolons_before_block_self_destruct
$index_max_forced_break
$input_line_number
$diagnostics_object
$vertical_aligner_object
$logger_object
$file_writer_object
$formatter_self
@ci_stack
$last_line_had_side_comment
%want_break_before
%outdent_keyword
$static_block_comment_pattern
$static_side_comment_pattern
%opening_vertical_tightness
%closing_vertical_tightness
%closing_token_indentation
%opening_token_right
%stack_opening_token
%stack_closing_token
$block_brace_vertical_tightness_pattern
$rOpts_add_newlines
$rOpts_add_whitespace
$rOpts_block_brace_tightness
$rOpts_block_brace_vertical_tightness
$rOpts_brace_left_and_indent
$rOpts_comma_arrow_breakpoints
$rOpts_break_at_old_keyword_breakpoints
$rOpts_break_at_old_comma_breakpoints
$rOpts_break_at_old_logical_breakpoints
$rOpts_break_at_old_ternary_breakpoints
$rOpts_closing_side_comment_else_flag
$rOpts_closing_side_comment_maximum_text
$rOpts_continuation_indentation
$rOpts_cuddled_else
$rOpts_delete_old_whitespace
$rOpts_fuzzy_line_length
$rOpts_indent_columns
$rOpts_line_up_parentheses
$rOpts_maximum_fields_per_table
$rOpts_maximum_line_length
$rOpts_short_concatenation_item_length
$rOpts_swallow_optional_blank_lines
$rOpts_ignore_old_breakpoints
$rOpts_format_skipping
$rOpts_space_function_paren
$rOpts_space_keyword_paren
$rOpts_keep_interior_semicolons
$half_maximum_line_length
%is_opening_type
%is_closing_type
%is_keyword_returning_list
%tightness
%matching_token
$rOpts
%right_bond_strength
%left_bond_strength
%binary_ws_rules
%want_left_space
%want_right_space
%is_digraph
%is_trigraph
$bli_pattern
$bli_list_string
%is_closing_type
%is_opening_type
%is_closing_token
%is_opening_token
};
BEGIN {
$bli_list_string = 'if else elsif unless while for foreach do : sub';
@_ = qw(
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
<= >= == =~ !~ != ++ -- /= x=
);
@is_digraph{@_} = (1) x scalar(@_);
@_ = qw( ... **= <<= >>= &&= ||= //= <=> );
@is_trigraph{@_} = (1) x scalar(@_);
@_ = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
@is_assignment{@_} = (1) x scalar(@_);
@_ = qw(
grep
keys
map
reverse
sort
split
);
@is_keyword_returning_list{@_} = (1) x scalar(@_);
@_ = qw(is if unless and or err last next redo return);
@is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
@_ = qw(until while for if elsif else);
@is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
@_ = qw(last next redo return);
@is_last_next_redo_return{@_} = (1) x scalar(@_);
@_ = qw(sort map grep);
@is_sort_map_grep{@_} = (1) x scalar(@_);
@_ = qw(sort map grep eval);
@is_sort_map_grep_eval{@_} = (1) x scalar(@_);
@_ = qw(sort map grep eval do);
@is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
@_ = qw(if unless);
@is_if_unless{@_} = (1) x scalar(@_);
@_ = qw(and or err);
@is_and_or{@_} = (1) x scalar(@_);
@_ = qw(&& || and or : ? . + - * /);
@is_chain_operator{@_} = (1) x scalar(@_);
@_ =
qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless while until for foreach);
@is_block_without_semicolon{@_} = (1) x scalar(@_);
@_ = qw" L { ( [ ";
@is_opening_type{@_} = (1) x scalar(@_);
@_ = qw" R } ) ] ";
@is_closing_type{@_} = (1) x scalar(@_);
@_ = qw" { ( [ ";
@is_opening_token{@_} = (1) x scalar(@_);
@_ = qw" } ) ] ";
@is_closing_token{@_} = (1) x scalar(@_);
}
use constant WS_YES => 1;
use constant WS_OPTIONAL => 0;
use constant WS_NO => -1;
use constant NO_BREAK => 10000;
use constant VERY_STRONG => 100;
use constant STRONG => 2.1;
use constant NOMINAL => 1.1;
use constant WEAK => 0.8;
use constant VERY_WEAK => 0.55;
use constant UNDEFINED_INDEX => -1;
use constant MAX_NAG_MESSAGES => 6;
use constant TYPE_SEQUENCE_INCREMENT => 4;
{
my $_count = 0;
sub get_count { $_count; }
sub _increment_count { ++$_count }
sub _decrement_count { --$_count }
}
sub trim {
$_[0] =~ s/\s+$//;
$_[0] =~ s/^\s+//;
return $_[0];
}
sub split_words {
my ($str) = @_;
return unless $str;
$str =~ s/\s+$//;
$str =~ s/^\s+//;
return split( /\s+/, $str );
}
sub warning {
if ($logger_object) {
$logger_object->warning(@_);
}
}
sub complain {
if ($logger_object) {
$logger_object->complain(@_);
}
}
sub write_logfile_entry {
if ($logger_object) {
$logger_object->write_logfile_entry(@_);
}
}
sub black_box {
if ($logger_object) {
$logger_object->black_box(@_);
}
}
sub report_definite_bug {
if ($logger_object) {
$logger_object->report_definite_bug();
}
}
sub get_saw_brace_error {
if ($logger_object) {
$logger_object->get_saw_brace_error();
}
}
sub we_are_at_the_last_line {
if ($logger_object) {
$logger_object->we_are_at_the_last_line();
}
}
sub write_diagnostics {
if ($diagnostics_object) {
$diagnostics_object->write_diagnostics(@_);
}
}
sub get_added_semicolon_count {
my $self = shift;
return $added_semicolon_count;
}
sub DESTROY {
$_[0]->_decrement_count();
}
sub new {
my $class = shift;
my %defaults = (
sink_object => undef,
diagnostics_object => undef,
logger_object => undef,
);
my %args = ( %defaults, @_ );
$logger_object = $args{logger_object};
$diagnostics_object = $args{diagnostics_object};
my $sink_object = $args{sink_object};
$file_writer_object =
Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
$gnu_position_predictor = 0; $max_gnu_stack_index = 0;
$max_gnu_item_index = -1;
$gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
@gnu_item_list = ();
$last_output_indentation = 0;
$last_indentation_written = 0;
$last_unadjusted_indentation = 0;
$last_leading_token = "";
$saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
$saw_END_or_DATA_ = 0;
@block_type_to_go = ();
@type_sequence_to_go = ();
@container_environment_to_go = ();
@bond_strength_to_go = ();
@forced_breakpoint_to_go = ();
@lengths_to_go = (); @levels_to_go = ();
@matching_token_to_go = ();
@mate_index_to_go = ();
@nesting_blocks_to_go = ();
@ci_levels_to_go = ();
@nesting_depth_to_go = (0);
@nobreak_to_go = ();
@old_breakpoint_to_go = ();
@tokens_to_go = ();
@types_to_go = ();
@leading_spaces_to_go = ();
@reduced_spaces_to_go = ();
@dont_align = ();
@has_broken_sublist = ();
@want_comma_break = ();
@ci_stack = ("");
$first_tabbing_disagreement = 0;
$last_tabbing_disagreement = 0;
$tabbing_disagreement_count = 0;
$in_tabbing_disagreement = 0;
$input_line_tabbing = undef;
$last_line_type = "";
$last_last_line_leading_level = 0;
$last_line_leading_level = 0;
$last_line_leading_type = '#';
$last_nonblank_token = ';';
$last_nonblank_type = ';';
$last_last_nonblank_token = ';';
$last_last_nonblank_type = ';';
$last_nonblank_block_type = "";
$last_output_level = 0;
$looking_for_else = 0;
$embedded_tab_count = 0;
$first_embedded_tab_at = 0;
$last_embedded_tab_at = 0;
$deleted_semicolon_count = 0;
$first_deleted_semicolon_at = 0;
$last_deleted_semicolon_at = 0;
$added_semicolon_count = 0;
$first_added_semicolon_at = 0;
$last_added_semicolon_at = 0;
$last_line_had_side_comment = 0;
$is_static_block_comment = 0;
%postponed_breakpoint = ();
%block_leading_text = ();
%block_opening_line_number = ();
$csc_new_statement_ok = 1;
%saved_opening_indentation = ();
$in_format_skipping_section = 0;
reset_block_text_accumulator();
prepare_for_new_input_lines();
$vertical_aligner_object =
Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
$logger_object, $diagnostics_object );
if ( $rOpts->{'entab-leading-whitespace'} ) {
write_logfile_entry(
"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
);
}
elsif ( $rOpts->{'tabs'} ) {
write_logfile_entry("Indentation will be with a tab character\n");
}
else {
write_logfile_entry(
"Indentation will be with $rOpts->{'indent-columns'} spaces\n");
}
$formatter_self = {};
bless $formatter_self, $class;
if ( _increment_count() > 1 ) {
confess
"Attempt to create more than 1 object in $class, which is not a true class yet\n";
}
return $formatter_self;
}
sub prepare_for_new_input_lines {
$gnu_sequence_number++; %last_gnu_equals = ();
%gnu_comma_count = ();
%gnu_arrow_count = ();
$line_start_index_to_go = 0;
$max_gnu_item_index = UNDEFINED_INDEX;
$index_max_forced_break = UNDEFINED_INDEX;
$max_index_to_go = UNDEFINED_INDEX;
$last_nonblank_index_to_go = UNDEFINED_INDEX;
$last_nonblank_type_to_go = '';
$last_nonblank_token_to_go = '';
$last_last_nonblank_index_to_go = UNDEFINED_INDEX;
$last_last_nonblank_type_to_go = '';
$last_last_nonblank_token_to_go = '';
$forced_breakpoint_count = 0;
$forced_breakpoint_undo_count = 0;
$rbrace_follower = undef;
$lengths_to_go[0] = 0;
$old_line_count_in_batch = 1;
$comma_count_in_batch = 0;
$starting_in_quote = 0;
destroy_one_line_block();
}
sub write_line {
my $self = shift;
my ($line_of_tokens) = @_;
my $line_type = $line_of_tokens->{_line_type};
my $input_line = $line_of_tokens->{_line_text};
if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
$file_writer_object->reset_consecutive_blank_lines();
if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
}
if ( $line_type eq 'CODE' ) {
if ( $input_line !~ /^\s*$/ ) {
my $output_line_number =
$vertical_aligner_object->get_output_line_number();
black_box( $line_of_tokens, $output_line_number );
}
print_line_of_tokens($line_of_tokens);
}
else {
my $skip_line = 0;
my $tee_line = 0;
if ( $line_type =~ /^POD/ ) {
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
if ( !$skip_line
&& $line_type eq 'POD_START'
&& $last_line_type !~ /^(END|DATA)$/ )
{
want_blank_line();
}
}
elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
$file_writer_object->reset_consecutive_blank_lines();
$saw_END_or_DATA_ = 1;
}
if ( !$skip_line ) {
if ($tee_line) { $file_writer_object->tee_on() }
write_unindented_line($input_line);
if ($tee_line) { $file_writer_object->tee_off() }
}
}
$last_line_type = $line_type;
}
sub create_one_line_block {
$index_start_one_line_block = $_[0];
$semicolons_before_block_self_destruct = $_[1];
}
sub destroy_one_line_block {
$index_start_one_line_block = UNDEFINED_INDEX;
$semicolons_before_block_self_destruct = 0;
}
sub leading_spaces_to_go {
return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
}
sub get_SPACES {
my $indentation = shift;
return ref($indentation) ? $indentation->get_SPACES() : $indentation;
}
sub get_RECOVERABLE_SPACES {
my $indentation = shift;
return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
}
sub get_AVAILABLE_SPACES_to_go {
my $item = $leading_spaces_to_go[ $_[0] ];
return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
}
sub new_lp_indentation_item {
my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
my $index = 0;
if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
my $item = Perl::Tidy::IndentationItem->new(
$spaces, $level,
$ci_level, $available_spaces,
$index, $gnu_sequence_number,
$align_paren, $max_gnu_stack_index,
$line_start_index_to_go,
);
if ( $level >= 0 ) {
$gnu_item_list[$max_gnu_item_index] = $item;
}
return $item;
}
sub set_leading_whitespace {
my ( $level, $ci_level, $in_continued_quote ) = @_;
if ( $rOpts_brace_left_and_indent
&& $max_index_to_go == 0
&& $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
{
$ci_level++;
}
if ( $level < 0 ) { $level = 0 }
unless ($rOpts_line_up_parentheses) {
my $space_count =
$ci_level * $rOpts_continuation_indentation +
$level * $rOpts_indent_columns;
my $ci_spaces =
( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
if ($in_continued_quote) {
$space_count = 0;
$ci_spaces = 0;
}
$leading_spaces_to_go[$max_index_to_go] = $space_count;
$reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
return;
}
if ($in_continued_quote) {
my $space_count = 0;
my $available_space = 0;
$level = -1; $leading_spaces_to_go[$max_index_to_go] =
$reduced_spaces_to_go[$max_index_to_go] =
new_lp_indentation_item( $space_count, $level, $ci_level,
$available_space, 0 );
return;
}
my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
my $type = $types_to_go[$max_index_to_go];
my $token = $tokens_to_go[$max_index_to_go];
my $total_depth = $nesting_depth_to_go[$max_index_to_go];
if ( $type eq '{' || $type eq '(' ) {
$gnu_comma_count{ $total_depth + 1 } = 0;
$gnu_arrow_count{ $total_depth + 1 } = 0;
my $last_equals = $last_gnu_equals{$total_depth};
if ( $last_equals && $last_equals > $line_start_index_to_go ) {
my $i_test = $last_equals;
if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
my $test_position = total_line_length( $i_test, $max_index_to_go );
if (
$gnu_position_predictor > $half_maximum_line_length
|| (
$gnu_position_predictor > $half_maximum_line_length / 2
&& (
$old_breakpoint_to_go[$last_equals]
|| ( $last_equals > 0
&& $old_breakpoint_to_go[ $last_equals - 1 ] )
|| ( $last_equals > 1
&& $types_to_go[ $last_equals - 1 ] eq 'b'
&& $old_breakpoint_to_go[ $last_equals - 2 ] )
)
)
)
{
$line_start_index_to_go = $i_test + 1;
$gnu_position_predictor = $test_position;
}
}
}
if ( $level < $current_level || $ci_level < $current_ci_level ) {
my ( $lev, $ci_lev );
while (1) {
if ($max_gnu_stack_index) {
$gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
my $available_spaces =
$gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
my $comma_count = 0;
my $arrow_count = 0;
if ( $type eq '}' || $type eq ')' ) {
$comma_count = $gnu_comma_count{$total_depth};
$arrow_count = $gnu_arrow_count{$total_depth};
$comma_count = 0 unless $comma_count;
$arrow_count = 0 unless $arrow_count;
}
$gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
$gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
if ( $available_spaces > 0 ) {
if ( $comma_count <= 0 || $arrow_count > 0 ) {
my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
my $seqno =
$gnu_stack[$max_gnu_stack_index]
->get_SEQUENCE_NUMBER();
if ( $gnu_sequence_number != $seqno
|| $i > $max_gnu_item_index )
{
warning(
"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
);
report_definite_bug();
}
else {
if ( $arrow_count == 0 ) {
$gnu_item_list[$i]
->permanently_decrease_AVAILABLE_SPACES(
$available_spaces);
}
else {
$gnu_item_list[$i]
->tentatively_decrease_AVAILABLE_SPACES(
$available_spaces);
}
my $j;
for (
$j = $i + 1 ;
$j <= $max_gnu_item_index ;
$j++
)
{
$gnu_item_list[$j]
->decrease_SPACES($available_spaces);
}
}
}
}
--$max_gnu_stack_index;
$lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
$ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
if ( $lev <= $level && $ci_lev <= $ci_level ) {
$space_count =
$gnu_stack[$max_gnu_stack_index]->get_SPACES();
$current_level = $lev;
$current_ci_level = $ci_lev;
last;
}
}
else {
warning(
"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
);
report_definite_bug();
last;
}
}
}
if ( $level > $current_level || $ci_level > $current_ci_level ) {
my $standard_increment =
( $level - $current_level ) * $rOpts_indent_columns +
( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
my $available_space = 0;
my $align_paren = 0;
my $excess = 0;
if ( $max_gnu_stack_index == 0 ) {
$space_count = $level * $rOpts_indent_columns;
}
elsif ($last_nonblank_block_type) {
$space_count += $standard_increment;
}
elsif ( $last_nonblank_type ne '{' ) {
$space_count += $standard_increment;
}
else {
$space_count = $gnu_position_predictor;
my $min_gnu_indentation =
$gnu_stack[$max_gnu_stack_index]->get_SPACES();
$available_space = $space_count - $min_gnu_indentation;
if ( $available_space >= $standard_increment ) {
$min_gnu_indentation += $standard_increment;
}
elsif ( $available_space > 1 ) {
$min_gnu_indentation += $available_space + 1;
}
elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
$min_gnu_indentation += 2;
}
else {
$min_gnu_indentation += 1;
}
}
else {
$min_gnu_indentation += $standard_increment;
}
$available_space = $space_count - $min_gnu_indentation;
if ( $available_space < 0 ) {
$space_count = $min_gnu_indentation;
$available_space = 0;
}
$align_paren = 1;
}
if ( $types_to_go[$max_index_to_go] ne 'b' ) {
$gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
++$max_gnu_stack_index;
$gnu_stack[$max_gnu_stack_index] =
new_lp_indentation_item( $space_count, $level, $ci_level,
$available_space, $align_paren );
if ( $available_space > 0
&& $space_count > $half_maximum_line_length )
{
$gnu_stack[$max_gnu_stack_index]
->tentatively_decrease_AVAILABLE_SPACES($available_space);
}
}
}
if ( $type eq '=>' ) {
$gnu_arrow_count{$total_depth}++;
$last_gnu_equals{$total_depth} = $max_index_to_go;
}
elsif ( $type eq ',' ) {
$gnu_comma_count{$total_depth}++;
}
elsif ( $is_assignment{$type} ) {
$last_gnu_equals{$total_depth} = $max_index_to_go;
}
if ( $type ne 'b' ) {
if (
$max_index_to_go == 1 && $types_to_go[0] eq 'b'
|| $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
|| ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
|| ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
|| $type =~ /^([\.]|\|\||\&\&)$/
|| ( $last_nonblank_type_to_go eq '}'
&& $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
|| ( $last_nonblank_type_to_go eq 'k'
&& ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
|| ( $type eq 'k'
&& $is_if_unless_and_or_last_next_redo_return{$token} )
|| (
$is_assignment{$last_nonblank_type_to_go}
&& (
$last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
|| $gnu_position_predictor > $half_maximum_line_length
)
)
)
{
check_for_long_gnu_style_lines();
$line_start_index_to_go = $max_index_to_go;
if ( $line_start_index_to_go > 0 ) {
if ( $last_nonblank_type_to_go eq 'k' ) {
if ( $want_break_before{$last_nonblank_token_to_go} ) {
$line_start_index_to_go--;
}
}
elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
$line_start_index_to_go--;
}
}
}
}
if ( $max_index_to_go > $line_start_index_to_go ) {
$gnu_position_predictor =
total_line_length( $line_start_index_to_go, $max_index_to_go );
}
else {
$gnu_position_predictor = $space_count +
token_sequence_length( $max_index_to_go, $max_index_to_go );
}
$leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
$reduced_spaces_to_go[$max_index_to_go] =
( $max_gnu_stack_index > 0 && $ci_level )
? $gnu_stack[ $max_gnu_stack_index - 1 ]
: $gnu_stack[$max_gnu_stack_index];
return;
}
sub check_for_long_gnu_style_lines {
return unless ($rOpts_line_up_parentheses);
return if ( $max_gnu_item_index == UNDEFINED_INDEX );
my $spaces_needed =
$gnu_position_predictor - $rOpts_maximum_line_length + 2;
return if ( $spaces_needed < 0 );
my @candidates = ();
my $i;
for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
my $item = $gnu_item_list[$i];
next if ( $item->get_CLOSED() >= 0 );
my $available_spaces = $item->get_AVAILABLE_SPACES();
if ( $available_spaces > 0 ) {
push( @candidates, [ $i, $available_spaces ] );
}
}
return unless (@candidates);
@candidates = sort { $b->[1] <=> $a->[1] } @candidates;
my $candidate;
foreach $candidate (@candidates) {
my ( $i, $available_spaces ) = @{$candidate};
my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
$gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
my $i_debug = $i;
for ( ; $i <= $max_gnu_item_index ; $i++ ) {
my $old_spaces = $gnu_item_list[$i]->get_SPACES();
if ( $old_spaces > $deleted_spaces ) {
$gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
}
else {
my $level = $gnu_item_list[$i_debug]->get_LEVEL();
my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
my $old_level = $gnu_item_list[$i]->get_LEVEL();
my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
warning(
"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
);
report_definite_bug();
}
}
$gnu_position_predictor -= $deleted_spaces;
$spaces_needed -= $deleted_spaces;
last unless ( $spaces_needed > 0 );
}
}
sub finish_lp_batch {
return unless ($rOpts_line_up_parentheses);
return if ( $max_gnu_item_index == UNDEFINED_INDEX );
my $i;
for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
my $item = $gnu_item_list[$i];
next if ( $item->get_CLOSED() >= 0 );
my $available_spaces = $item->get_AVAILABLE_SPACES();
if ( $available_spaces > 0 ) {
$gnu_item_list[$i]
->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
foreach ( $i + 1 .. $max_gnu_item_index ) {
$gnu_item_list[$_]->decrease_SPACES($available_spaces);
}
}
}
return;
}
sub reduce_lp_indentation {
my ( $i, $spaces_wanted ) = @_;
my $deleted_spaces = 0;
my $item = $leading_spaces_to_go[$i];
my $available_spaces = $item->get_AVAILABLE_SPACES();
if (
$available_spaces > 0
&& ( ( $spaces_wanted <= $available_spaces )
|| !$item->get_HAVE_CHILD() )
)
{
$deleted_spaces =
$item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
}
return $deleted_spaces;
}
sub token_sequence_length {
my $ifirst = shift;
my $ilast = shift;
return 0 if ( $ilast < 0 || $ifirst > $ilast );
return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
}
sub total_line_length {
my $ifirst = shift;
my $ilast = shift;
if ( $ifirst < 0 ) { $ifirst = 0 }
return leading_spaces_to_go($ifirst) +
token_sequence_length( $ifirst, $ilast );
}
sub excess_line_length {
my $ifirst = shift;
my $ilast = shift;
if ( $ifirst < 0 ) { $ifirst = 0 }
return leading_spaces_to_go($ifirst) +
token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
}
sub finish_formatting {
my $self = shift;
flush();
$file_writer_object->decrement_output_line_number()
; we_are_at_the_last_line();
if ( $added_semicolon_count > 0 ) {
my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
my $what =
( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
write_logfile_entry("$added_semicolon_count $what added:\n");
write_logfile_entry(
" $first at input line $first_added_semicolon_at\n");
if ( $added_semicolon_count > 1 ) {
write_logfile_entry(
" Last at input line $last_added_semicolon_at\n");
}
write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
write_logfile_entry("\n");
}
if ( $deleted_semicolon_count > 0 ) {
my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
my $what =
( $deleted_semicolon_count > 1 )
? "semicolons were"
: "semicolon was";
write_logfile_entry(
"$deleted_semicolon_count unnecessary $what deleted:\n");
write_logfile_entry(
" $first at input line $first_deleted_semicolon_at\n");
if ( $deleted_semicolon_count > 1 ) {
write_logfile_entry(
" Last at input line $last_deleted_semicolon_at\n");
}
write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
write_logfile_entry("\n");
}
if ( $embedded_tab_count > 0 ) {
my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
my $what =
( $embedded_tab_count > 1 )
? "quotes or patterns"
: "quote or pattern";
write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
write_logfile_entry(
"This means the display of this script could vary with device or software\n"
);
write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
if ( $embedded_tab_count > 1 ) {
write_logfile_entry(
" Last at input line $last_embedded_tab_at\n");
}
write_logfile_entry("\n");
}
if ($first_tabbing_disagreement) {
write_logfile_entry(
"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
);
}
if ($in_tabbing_disagreement) {
write_logfile_entry(
"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
);
}
else {
if ($last_tabbing_disagreement) {
write_logfile_entry(
"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
);
}
else {
write_logfile_entry("No indentation disagreement seen\n");
}
}
write_logfile_entry("\n");
$vertical_aligner_object->report_anything_unusual();
$file_writer_object->report_line_length_errors();
}
sub check_options {
($rOpts) = @_;
my ( $tabbing_string, $tab_msg );
make_static_block_comment_pattern();
make_static_side_comment_pattern();
make_closing_side_comment_prefix();
make_closing_side_comment_list_pattern();
$format_skipping_pattern_begin =
make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
$format_skipping_pattern_end =
make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
if ( $rOpts->{'closing-side-comments'} ) {
if ( !$rOpts->{'closing-side-comment-warnings'} ) {
$rOpts->{'delete-closing-side-comments'} = 1;
}
}
elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
if ( $rOpts->{'delete-closing-side-comments'} ) {
$rOpts->{'delete-closing-side-comments'} = 0;
$rOpts->{'closing-side-comments'} = 1;
$rOpts->{'closing-side-comment-interval'} = 100000000;
}
}
make_bli_pattern();
make_block_brace_vertical_tightness_pattern();
if ( $rOpts->{'line-up-parentheses'} ) {
if ( $rOpts->{'indent-only'}
|| !$rOpts->{'add-newlines'}
|| !$rOpts->{'delete-old-newlines'} )
{
warn <<EOM;
-----------------------------------------------------------------------
Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
The -lp indentation logic requires that perltidy be able to coordinate
arbitrarily large numbers of line breakpoints. This isn't possible
with these flags. Sometimes an acceptable workaround is to use -wocb=3
-----------------------------------------------------------------------
EOM
$rOpts->{'line-up-parentheses'} = 0;
}
}
# At present, tabs are not compatable with the line-up-parentheses style
# (it would be possible to entab the total leading whitespace
# just prior to writing the line, if desired).
if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
warn <<EOM;
Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
# Likewise, tabs are not compatable with outdenting..
if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
warn <<EOM;
Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
warn <<EOM;
Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
EOM
$rOpts->{'tabs'} = 0;
}
if ( !$rOpts->{'space-for-semicolon'} ) {
$want_left_space{'f'} = -1;
}
if ( $rOpts->{'space-terminal-semicolon'} ) {
$want_left_space{';'} = 1;
}
# implement outdenting preferences for keywords
%outdent_keyword = ();
unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
@_ = qw(next last redo goto return); # defaults
}
# FUTURE: if not a keyword, assume that it is an identifier
foreach (@_) {
if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
$outdent_keyword{$_} = 1;
}
else {
warn "ignoring '$_' in -okwl list; not a perl keyword";
}
}
# implement user whitespace preferences
if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
@want_left_space{@_} = (1) x scalar(@_);
}
if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
@want_right_space{@_} = (1) x scalar(@_);
}
if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
@want_left_space{@_} = (-1) x scalar(@_);
}
if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
@want_right_space{@_} = (-1) x scalar(@_);
}
if ( $rOpts->{'dump-want-left-space'} ) {
dump_want_left_space(*STDOUT);
exit 1;
}
if ( $rOpts->{'dump-want-right-space'} ) {
dump_want_right_space(*STDOUT);
exit 1;
}
# default keywords for which space is introduced before an opening paren
# (at present, including them messes up vertical alignment)
@_ = qw(my local our and or err eq ne if else elsif until
unless while for foreach return switch case given when);
@space_after_keyword{@_} = (1) x scalar(@_);
# allow user to modify these defaults
if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
@space_after_keyword{@_} = (1) x scalar(@_);
}
if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
@space_after_keyword{@_} = (0) x scalar(@_);
}
# implement user break preferences
my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
. : ? && || and or err xor
);
my $break_after = sub {
foreach my $tok (@_) {
if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
( $lbs, $rbs );
}
}
};
my $break_before = sub {
foreach my $tok (@_) {
my $lbs = $left_bond_strength{$tok};
my $rbs = $right_bond_strength{$tok};
if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
( $lbs, $rbs );
}
}
};
$break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
$break_before->(@all_operators)
if ( $rOpts->{'break-before-all-operators'} );
$break_after->( split_words( $rOpts->{'want-break-after'} ) );
$break_before->( split_words( $rOpts->{'want-break-before'} ) );
# make note if breaks are before certain key types
%want_break_before = ();
foreach my $tok ( @all_operators, ',' ) {
$want_break_before{$tok} =
$left_bond_strength{$tok} < $right_bond_strength{$tok};
}
# Coordinate ?/: breaks, which must be similar
if ( !$want_break_before{':'} ) {
$want_break_before{'?'} = $want_break_before{':'};
$right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
$left_bond_strength{'?'} = NO_BREAK;
}
# Define here tokens which may follow the closing brace of a do statement
# on the same line, as in:
# } while ( $something);
@_ = qw(until while unless if ; : );
push @_, ',';
@is_do_follower{@_} = (1) x scalar(@_);
# These tokens may follow the closing brace of an if or elsif block.
# In other words, for cuddled else we want code to look like:
# } elsif ( $something) {
# } else {
if ( $rOpts->{'cuddled-else'} ) {
@_ = qw(else elsif);
@is_if_brace_follower{@_} = (1) x scalar(@_);
}
else {
%is_if_brace_follower = ();
}
# nothing can follow the closing curly of an else { } block:
%is_else_brace_follower = ();
# what can follow a multi-line anonymous sub definition closing curly:
@_ = qw# ; : => or and && || ~~ !~~ ) #;
push @_, ',';
@is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
# what can follow a one-line anonynomous sub closing curly:
# one-line anonumous subs also have ']' here...
# see tk3.t and PP.pm
@_ = qw# ; : => or and && || ) ] ~~ !~~ #;
push @_, ',';
@is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
# What can follow a closing curly of a block
# which is not an if/elsif/else/do/sort/map/grep/eval/sub
# Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
@_ = qw# ; : => or and && || ) #;
push @_, ',';
# allow cuddled continue if cuddled else is specified
if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
@is_other_brace_follower{@_} = (1) x scalar(@_);
$right_bond_strength{'{'} = WEAK;
$left_bond_strength{'{'} = VERY_STRONG;
# make -l=0 equal to -l=infinite
if ( !$rOpts->{'maximum-line-length'} ) {
$rOpts->{'maximum-line-length'} = 1000000;
}
# make -lbl=0 equal to -lbl=infinite
if ( !$rOpts->{'long-block-line-count'} ) {
$rOpts->{'long-block-line-count'} = 1000000;
}
my $ole = $rOpts->{'output-line-ending'};
if ($ole) {
my %endings = (
dos => "\015\012",
win => "\015\012",
mac => "\015",
unix => "\012",
);
$ole = lc $ole;
unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
my $str = join " ", keys %endings;
die <<EOM;
Unrecognized line ending '$ole'; expecting one of: $str
EOM
}
if ( $rOpts->{'preserve-line-endings'} ) {
warn "Ignoring -ple; conflicts with -ole\n";
$rOpts->{'preserve-line-endings'} = undef;
}
}
# hashes used to simplify setting whitespace
%tightness = (
'{' => $rOpts->{'brace-tightness'},
'}' => $rOpts->{'brace-tightness'},
'(' => $rOpts->{'paren-tightness'},
')' => $rOpts->{'paren-tightness'},
'[' => $rOpts->{'square-bracket-tightness'},
']' => $rOpts->{'square-bracket-tightness'},
);
%matching_token = (
'{' => '}',
'(' => ')',
'[' => ']',
'?' => ':',
);
# frequently used parameters
$rOpts_add_newlines = $rOpts->{'add-newlines'};
$rOpts_add_whitespace = $rOpts->{'add-whitespace'};
$rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
$rOpts_block_brace_vertical_tightness =
$rOpts->{'block-brace-vertical-tightness'};
$rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
$rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
$rOpts_break_at_old_ternary_breakpoints =
$rOpts->{'break-at-old-ternary-breakpoints'};
$rOpts_break_at_old_comma_breakpoints =
$rOpts->{'break-at-old-comma-breakpoints'};
$rOpts_break_at_old_keyword_breakpoints =
$rOpts->{'break-at-old-keyword-breakpoints'};
$rOpts_break_at_old_logical_breakpoints =
$rOpts->{'break-at-old-logical-breakpoints'};
$rOpts_closing_side_comment_else_flag =
$rOpts->{'closing-side-comment-else-flag'};
$rOpts_closing_side_comment_maximum_text =
$rOpts->{'closing-side-comment-maximum-text'};
$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
$rOpts_cuddled_else = $rOpts->{'cuddled-else'};
$rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
$rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
$rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
$rOpts_short_concatenation_item_length =
$rOpts->{'short-concatenation-item-length'};
$rOpts_swallow_optional_blank_lines =
$rOpts->{'swallow-optional-blank-lines'};
$rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
$rOpts_format_skipping = $rOpts->{'format-skipping'};
$rOpts_space_function_paren = $rOpts->{'space-function-paren'};
$rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
$rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
$half_maximum_line_length = $rOpts_maximum_line_length / 2;
# Note that both opening and closing tokens can access the opening
# and closing flags of their container types.
%opening_vertical_tightness = (
'(' => $rOpts->{'paren-vertical-tightness'},
'{' => $rOpts->{'brace-vertical-tightness'},
'[' => $rOpts->{'square-bracket-vertical-tightness'},
')' => $rOpts->{'paren-vertical-tightness'},
'}' => $rOpts->{'brace-vertical-tightness'},
']' => $rOpts->{'square-bracket-vertical-tightness'},
);
%closing_vertical_tightness = (
'(' => $rOpts->{'paren-vertical-tightness-closing'},
'{' => $rOpts->{'brace-vertical-tightness-closing'},
'[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
')' => $rOpts->{'paren-vertical-tightness-closing'},
'}' => $rOpts->{'brace-vertical-tightness-closing'},
']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
);
# assume flag for '>' same as ')' for closing qw quotes
%closing_token_indentation = (
')' => $rOpts->{'closing-paren-indentation'},
'}' => $rOpts->{'closing-brace-indentation'},
']' => $rOpts->{'closing-square-bracket-indentation'},
'>' => $rOpts->{'closing-paren-indentation'},
);
%opening_token_right = (
'(' => $rOpts->{'opening-paren-right'},
'{' => $rOpts->{'opening-hash-brace-right'},
'[' => $rOpts->{'opening-square-bracket-right'},
);
%stack_opening_token = (
'(' => $rOpts->{'stack-opening-paren'},
'{' => $rOpts->{'stack-opening-hash-brace'},
'[' => $rOpts->{'stack-opening-square-bracket'},
);
%stack_closing_token = (
')' => $rOpts->{'stack-closing-paren'},
'}' => $rOpts->{'stack-closing-hash-brace'},
']' => $rOpts->{'stack-closing-square-bracket'},
);
}
sub make_static_block_comment_pattern {
# create the pattern used to identify static block comments
$static_block_comment_pattern = '^\s*
if ( $rOpts->{'static-block-comment-prefix'} ) {
my $prefix = $rOpts->{'static-block-comment-prefix'};
$prefix =~ s/^\s*//;
my $pattern = $prefix;
if ( $prefix !~ /^\^ if ( $prefix !~ /^ die
"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
}
$pattern = '^\s*' . $prefix;
}
eval "'##'=~/$pattern/";
if ($@) {
die
"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
}
$static_block_comment_pattern = $pattern;
}
}
sub make_format_skipping_pattern {
my ( $opt_name, $default ) = @_;
my $param = $rOpts->{$opt_name};
unless ($param) { $param = $default }
$param =~ s/^\s*//;
if ( $param !~ /^ die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
}
my $pattern = '^' . $param . '\s';
eval "'#'=~/$pattern/";
if ($@) {
die
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
}
return $pattern;
}
sub make_closing_side_comment_list_pattern {
$closing_side_comment_list_pattern = '^\w+';
if ( defined( $rOpts->{'closing-side-comment-list'} )
&& $rOpts->{'closing-side-comment-list'} )
{
$closing_side_comment_list_pattern =
make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
}
}
sub make_bli_pattern {
if ( defined( $rOpts->{'brace-left-and-indent-list'} )
&& $rOpts->{'brace-left-and-indent-list'} )
{
$bli_list_string = $rOpts->{'brace-left-and-indent-list'};
}
$bli_pattern = make_block_pattern( '-blil', $bli_list_string );
}
sub make_block_brace_vertical_tightness_pattern {
$block_brace_vertical_tightness_pattern =
'^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
&& $rOpts->{'block-brace-vertical-tightness-list'} )
{
$block_brace_vertical_tightness_pattern =
make_block_pattern( '-bbvtl',
$rOpts->{'block-brace-vertical-tightness-list'} );
}
}
sub make_block_pattern {
my ( $abbrev, $string ) = @_;
my @list = split_words($string);
my @words = ();
my %seen;
for my $i (@list) {
next if $seen{$i};
$seen{$i} = 1;
if ( $i eq 'sub' ) {
}
elsif ( $i eq ':' ) {
push @words, '\w+:';
}
elsif ( $i =~ /^\w/ ) {
push @words, $i;
}
else {
warn "unrecognized block type $i after $abbrev, ignoring\n";
}
}
my $pattern = '(' . join( '|', @words ) . ')$';
if ( $seen{'sub'} ) {
$pattern = '(' . $pattern . '|sub)';
}
$pattern = '^' . $pattern;
return $pattern;
}
sub make_static_side_comment_pattern {
$static_side_comment_pattern = '^##';
if ( $rOpts->{'static-side-comment-prefix'} ) {
my $prefix = $rOpts->{'static-side-comment-prefix'};
$prefix =~ s/^\s*//;
my $pattern = '^' . $prefix;
eval "'##'=~/$pattern/";
if ($@) {
die
"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
}
$static_side_comment_pattern = $pattern;
}
}
sub make_closing_side_comment_prefix {
my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
my $csc_prefix_pattern;
if ( !defined($csc_prefix) ) {
$csc_prefix = '## end';
$csc_prefix_pattern = '^##\s+end';
}
else {
my $test_csc_prefix = $csc_prefix;
if ( $test_csc_prefix !~ /^ $test_csc_prefix = '#' . $test_csc_prefix;
}
my $test_csc_prefix_pattern = $test_csc_prefix;
$test_csc_prefix_pattern =~ s/([^
$test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
$test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
eval "'##'=~/$test_csc_prefix_pattern/";
if ($@) {
report_definite_bug();
warn
"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
warn "Please consider using a simpler -cscp prefix\n";
warn "Using default -cscp instead; please check output\n";
}
else {
$csc_prefix = $test_csc_prefix;
$csc_prefix_pattern = $test_csc_prefix_pattern;
}
}
$rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
$closing_side_comment_prefix_pattern = $csc_prefix_pattern;
}
sub dump_want_left_space {
my $fh = shift;
local $" = "\n";
print $fh <<EOM;
These values are the main control of whitespace to the left of a token type;
They may be altered with the -wls parameter.
For a list of token types, use perltidy --dump-token-types (-dtt)
1 means the token wants a space to its left
-1 means the token does not want a space to its left
------------------------------------------------------------------------
EOM
foreach ( sort keys %want_left_space ) {
print $fh "$_\t$want_left_space{$_}\n";
}
}
sub dump_want_right_space {
my $fh = shift;
local $" = "\n";
print $fh <<EOM;
These values are the main control of whitespace to the right of a token type;
They may be altered with the -wrs parameter.
For a list of token types, use perltidy --dump-token-types (-dtt)
1 means the token wants a space to its right
-1 means the token does not want a space to its right
------------------------------------------------------------------------
EOM
foreach ( sort keys %want_right_space ) {
print $fh "$_\t$want_right_space{$_}\n";
}
}
{
my %is_sort_grep_map;
my %is_for_foreach;
BEGIN {
@_ = qw(sort grep map);
@is_sort_grep_map{@_} = (1) x scalar(@_);
@_ = qw(for foreach);
@is_for_foreach{@_} = (1) x scalar(@_);
}
sub is_essential_whitespace {
my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
my $result =
( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
|| ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
|| ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
|| ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
|| ( ( $tokenl eq '-' )
&& ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
|| ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
|| ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
&& ( $tokenr =~ /^[a-zA-Z_]/ ) )
|| ( $tokenr =~ /^\<\</ )
|| ( $is_digraph{ $tokenl . $tokenr } )
|| ( $is_trigraph{ $tokenl . $tokenr } )
|| ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
|| ( ( $tokenl =~ /^\$[\$\
|| ( $typel eq 'Z' )
|| ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
|| ( $tokenr eq '('
&& $typel eq 'w'
&& $typell eq 'k'
&& $tokenll eq 'use' )
|| ( $typel eq 'Y' && $tokenr eq '(' )
|| ( $typel eq 'h' )
|| ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
|| ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
|| (
$tokenl eq 'my'
&& $is_for_foreach{$tokenll}
&& $tokenr =~ /^\$/
)
|| ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
|| ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
|| ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
; return $result;
}
}
sub set_white_space_flag {
BEGIN {
@_ = qw" L { ( [ ";
@is_opening_type{@_} = (1) x scalar(@_);
@_ = qw" R } ) ] ";
@is_closing_type{@_} = (1) x scalar(@_);
my @spaces_both_sides = qw"
+ - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
.= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
&&= ||= //= <=> A k f w F n C Y U G v
";
my @spaces_left_side = qw"
t ! ~ m p { \ h pp mm Z j
";
push( @spaces_left_side, '#' );
my @spaces_right_side = qw"
; } ) ] R J ++ -- **=
";
push( @spaces_right_side, ',' ); @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
@want_right_space{@spaces_both_sides} =
(1) x scalar(@spaces_both_sides);
@want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
@want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
@want_left_space{@spaces_right_side} =
(-1) x scalar(@spaces_right_side);
@want_right_space{@spaces_right_side} =
(1) x scalar(@spaces_right_side);
$want_left_space{'L'} = WS_NO;
$want_left_space{'->'} = WS_NO;
$want_right_space{'->'} = WS_NO;
$want_left_space{'**'} = WS_NO;
$want_right_space{'**'} = WS_NO;
$binary_ws_rules{'i'}{'L'} = WS_NO;
$binary_ws_rules{'i'}{'{'} = WS_YES;
$binary_ws_rules{'k'}{'{'} = WS_YES;
$binary_ws_rules{'U'}{'{'} = WS_YES;
$binary_ws_rules{'i'}{'['} = WS_NO;
$binary_ws_rules{'R'}{'L'} = WS_NO;
$binary_ws_rules{'R'}{'{'} = WS_NO;
$binary_ws_rules{'t'}{'L'} = WS_NO;
$binary_ws_rules{'t'}{'{'} = WS_NO;
$binary_ws_rules{'}'}{'L'} = WS_NO;
$binary_ws_rules{'}'}{'{'} = WS_NO;
$binary_ws_rules{'$'}{'L'} = WS_NO;
$binary_ws_rules{'$'}{'{'} = WS_NO;
$binary_ws_rules{'@'}{'L'} = WS_NO;
$binary_ws_rules{'@'}{'{'} = WS_NO;
$binary_ws_rules{'='}{'L'} = WS_YES;
$binary_ws_rules{']'}{'L'} = WS_NO;
$binary_ws_rules{']'}{'{'} = WS_NO;
$binary_ws_rules{')'}{'{'} = WS_YES;
$binary_ws_rules{')'}{'['} = WS_NO;
$binary_ws_rules{']'}{'['} = WS_NO;
$binary_ws_rules{']'}{'{'} = WS_NO;
$binary_ws_rules{'}'}{'['} = WS_NO;
$binary_ws_rules{'R'}{'['} = WS_NO;
$binary_ws_rules{']'}{'++'} = WS_NO;
$binary_ws_rules{']'}{'--'} = WS_NO;
$binary_ws_rules{')'}{'++'} = WS_NO;
$binary_ws_rules{')'}{'--'} = WS_NO;
$binary_ws_rules{'R'}{'++'} = WS_NO;
$binary_ws_rules{'R'}{'--'} = WS_NO;
$binary_ws_rules{'i'}{'Q'} = WS_YES;
$binary_ws_rules{'n'}{'('} = WS_YES;
$binary_ws_rules{'i'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'('} = WS_NO;
$binary_ws_rules{'w'}{'{'} = WS_YES;
}
my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
my ( $last_token, $last_type, $last_block_type, $token, $type,
$block_type );
my (@white_space_flag);
my $j_tight_closing_paren = -1;
if ( $max_index_to_go >= 0 ) {
$token = $tokens_to_go[$max_index_to_go];
$type = $types_to_go[$max_index_to_go];
$block_type = $block_type_to_go[$max_index_to_go];
}
else {
$token = ' ';
$type = 'b';
$block_type = '';
}
my ( $j, $ws );
for ( $j = 0 ; $j <= $jmax ; $j++ ) {
if ( $$rtoken_type[$j] eq 'b' ) {
$white_space_flag[$j] = WS_OPTIONAL;
next;
}
$ws = undef;
$last_token = $token;
$last_type = $type;
$last_block_type = $block_type;
$token = $$rtokens[$j];
$type = $$rtoken_type[$j];
$block_type = $$rblock_type[$j];
if ( $is_opening_type{$last_type} ) {
$j_tight_closing_paren = -1;
if ( $token eq $matching_token{$last_token} ) {
if ($block_type) {
$ws = WS_YES;
}
else {
$ws = WS_NO;
}
}
else {
my $tightness;
if ( $last_type eq '{'
&& $last_token eq '{'
&& $last_block_type )
{
$tightness = $rOpts_block_brace_tightness;
}
else { $tightness = $tightness{$last_token} }
if ( $tightness <= 0 ) {
$ws = WS_YES;
}
elsif ( $tightness > 1 ) {
$ws = WS_NO;
}
else {
my $j_here = $j;
++$j_here
if ( $token eq '-'
&& $last_token eq '{'
&& $$rtoken_type[ $j + 1 ] eq 'w' );
my $j_next =
( $$rtoken_type[ $j_here + 1 ] eq 'b' )
? $j_here + 2
: $j_here + 1;
my $tok_next = $$rtokens[$j_next];
my $type_next = $$rtoken_type[$j_next];
if (
$tok_next eq $matching_token{$last_token}
&& $last_token ne $token
)
{
$j_tight_closing_paren = $j_next;
$ws = WS_NO;
}
else {
$ws = WS_YES;
}
}
}
} my $ws_1 = $ws
if FORMATTER_DEBUG_FLAG_WHITE;
if ( $is_closing_type{$type} ) {
if ( $j == $j_tight_closing_paren ) {
$j_tight_closing_paren = -1;
$ws = WS_NO;
}
else {
if ( !defined($ws) ) {
my $tightness;
if ( $type eq '}' && $token eq '}' && $block_type ) {
$tightness = $rOpts_block_brace_tightness;
}
else { $tightness = $tightness{$token} }
$ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
}
}
}
my $ws_2 = $ws
if FORMATTER_DEBUG_FLAG_WHITE;
if ( !defined($ws) ) {
$ws = $binary_ws_rules{$last_type}{$type};
}
my $ws_3 = $ws
if FORMATTER_DEBUG_FLAG_WHITE;
if ( $token eq '(' ) {
if ( $last_type eq '}' ) { $ws = WS_YES }
elsif ( $last_type eq 'k' ) {
$ws = WS_NO
unless ( $rOpts_space_keyword_paren
|| $space_after_keyword{$last_token} );
}
elsif (( $last_type =~ /^[wU]$/ )
|| ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
{
$ws = WS_NO unless ($rOpts_space_function_paren);
}
elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
$ws = WS_YES;
}
elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
$ws = WS_NO;
}
}
elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
$ws = WS_OPTIONAL;
}
if ( $type eq '{' ) {
if ( $last_token eq 'sub' ) {
$ws = WS_YES;
}
if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
$ws = WS_NO;
}
}
elsif ( $type eq 'i' ) {
if ( $token =~ /^\-\>/ ) {
$ws = WS_NO;
}
}
elsif ( $type eq 'w' || $type eq 'C' ) {
$ws = WS_OPTIONAL if $last_type eq '-';
if ( $token =~ /^\-\>/ ) {
$ws = WS_NO;
}
}
elsif ( $type eq 'm' || $type eq '-' ) {
$ws = WS_OPTIONAL if ( $last_type eq 'w' );
}
elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
if (
$type ne '#'
&& ( ( $last_type eq 'Z' && $last_token ne '_' )
|| $last_type eq 'h' )
)
{
$ws = WS_OPTIONAL;
}
my $ws_4 = $ws
if FORMATTER_DEBUG_FLAG_WHITE;
if ( !defined($ws) ) {
my $wl = $want_left_space{$type};
my $wr = $want_right_space{$last_type};
if ( !defined($wl) ) { $wl = 0 }
if ( !defined($wr) ) { $wr = 0 }
$ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
}
if ( !defined($ws) ) {
$ws = 0;
write_diagnostics(
"WS flag is undefined for tokens $last_token $token\n");
}
if ( $ws == 0 && $j == 0 ) { $ws = 1 }
if ( ( $ws == 0 )
&& $j > 0
&& $j < $jmax
&& ( $last_type !~ /^[Zh]$/ ) )
{
write_diagnostics(
"WS flag is zero for tokens $last_token $token\n");
}
$white_space_flag[$j] = $ws;
FORMATTER_DEBUG_FLAG_WHITE && do {
my $str = substr( $last_token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
if ( !defined($ws_1) ) { $ws_1 = "*" }
if ( !defined($ws_2) ) { $ws_2 = "*" }
if ( !defined($ws_3) ) { $ws_3 = "*" }
if ( !defined($ws_4) ) { $ws_4 = "*" }
print
"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
};
}
return \@white_space_flag;
}
{
my $rtoken_type;
my $rtokens;
my $rlevels;
my $rslevels;
my $rblock_type;
my $rcontainer_type;
my $rcontainer_environment;
my $rtype_sequence;
my $input_line;
my $rnesting_tokens;
my $rci_levels;
my $rnesting_blocks;
my $in_quote;
my $python_indentation_level;
my $block_type;
my $ci_level;
my $container_environment;
my $container_type;
my $in_continued_quote;
my $level;
my $nesting_blocks;
my $no_internal_newlines;
my $slevel;
my $token;
my $type;
my $type_sequence;
sub extract_token {
my $j = shift;
$token = $$rtokens[$j];
$type = $$rtoken_type[$j];
$block_type = $$rblock_type[$j];
$container_type = $$rcontainer_type[$j];
$container_environment = $$rcontainer_environment[$j];
$type_sequence = $$rtype_sequence[$j];
$level = $$rlevels[$j];
$slevel = $$rslevels[$j];
$nesting_blocks = $$rnesting_blocks[$j];
$ci_level = $$rci_levels[$j];
}
{
my @saved_token;
sub save_current_token {
@saved_token = (
$block_type, $ci_level,
$container_environment, $container_type,
$in_continued_quote, $level,
$nesting_blocks, $no_internal_newlines,
$slevel, $token,
$type, $type_sequence,
);
}
sub restore_current_token {
(
$block_type, $ci_level,
$container_environment, $container_type,
$in_continued_quote, $level,
$nesting_blocks, $no_internal_newlines,
$slevel, $token,
$type, $type_sequence,
) = @saved_token;
}
}
sub store_token_to_go {
my $flag = $no_internal_newlines;
if ( $_[0] ) { $flag = 1 }
$tokens_to_go[ ++$max_index_to_go ] = $token;
$types_to_go[$max_index_to_go] = $type;
$nobreak_to_go[$max_index_to_go] = $flag;
$old_breakpoint_to_go[$max_index_to_go] = 0;
$forced_breakpoint_to_go[$max_index_to_go] = 0;
$block_type_to_go[$max_index_to_go] = $block_type;
$type_sequence_to_go[$max_index_to_go] = $type_sequence;
$container_environment_to_go[$max_index_to_go] = $container_environment;
$nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
$ci_levels_to_go[$max_index_to_go] = $ci_level;
$mate_index_to_go[$max_index_to_go] = -1;
$matching_token_to_go[$max_index_to_go] = '';
$bond_strength_to_go[$max_index_to_go] = 0;
$levels_to_go[$max_index_to_go] = $level;
$nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
$lengths_to_go[ $max_index_to_go + 1 ] =
$lengths_to_go[$max_index_to_go] + length($token);
set_leading_whitespace( $level, $ci_level, $in_continued_quote );
if ( $type ne 'b' ) {
$last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
$last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
$last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
$last_nonblank_index_to_go = $max_index_to_go;
$last_nonblank_type_to_go = $type;
$last_nonblank_token_to_go = $token;
if ( $type eq ',' ) {
$comma_count_in_batch++;
}
}
FORMATTER_DEBUG_FLAG_STORE && do {
my ( $a, $b, $c ) = caller();
print
"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
};
}
sub insert_new_token_to_go {
save_current_token();
( $token, $type, $slevel, $no_internal_newlines ) = @_;
if ( $max_index_to_go == UNDEFINED_INDEX ) {
warning("code bug: bad call to insert_new_token_to_go\n");
}
$level = $levels_to_go[$max_index_to_go];
$nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
$ci_level = $ci_levels_to_go[$max_index_to_go];
$container_environment = $container_environment_to_go[$max_index_to_go];
$in_continued_quote = 0;
$block_type = "";
$type_sequence = "";
store_token_to_go();
restore_current_token();
return;
}
sub print_line_of_tokens {
my $line_of_tokens = shift;
$input_line_number = $line_of_tokens->{_line_number};
$rtoken_type = $line_of_tokens->{_rtoken_type};
$rtokens = $line_of_tokens->{_rtokens};
$rlevels = $line_of_tokens->{_rlevels};
$rslevels = $line_of_tokens->{_rslevels};
$rblock_type = $line_of_tokens->{_rblock_type};
$rcontainer_type = $line_of_tokens->{_rcontainer_type};
$rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
$rtype_sequence = $line_of_tokens->{_rtype_sequence};
$input_line = $line_of_tokens->{_line_text};
$rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
$rci_levels = $line_of_tokens->{_rci_levels};
$rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
$in_continued_quote = $starting_in_quote =
$line_of_tokens->{_starting_in_quote};
$in_quote = $line_of_tokens->{_ending_in_quote};
$ending_in_quote = $in_quote;
$python_indentation_level =
$line_of_tokens->{_python_indentation_level};
my $j;
my $j_next;
my $jmax;
my $next_nonblank_token;
my $next_nonblank_token_type;
my $rwhite_space_flag;
$jmax = @$rtokens - 1;
$block_type = "";
$container_type = "";
$container_environment = "";
$type_sequence = "";
$no_internal_newlines = 1 - $rOpts_add_newlines;
$is_static_block_comment = 0;
if ($in_continued_quote) {
if ( $jmax <= 0 ) {
if ( ( $input_line =~ "\t" ) ) {
note_embedded_tab();
}
write_unindented_line("$input_line");
$last_line_had_side_comment = 0;
return;
}
if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
warning(
"-chk: please check this line for extra leading whitespace\n"
);
}
}
if ($in_format_skipping_section) {
write_unindented_line("$input_line");
$last_line_had_side_comment = 0;
if ( $jmax == 0
&& $$rtoken_type[0] eq '#'
&& ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
{
$in_format_skipping_section = 0;
write_logfile_entry("Exiting formatting skip section\n");
}
return;
}
if ( $rOpts_format_skipping
&& $jmax == 0
&& $$rtoken_type[0] eq '#'
&& ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
{
flush();
$in_format_skipping_section = 1;
write_logfile_entry("Entering formatting skip section\n");
write_unindented_line("$input_line");
$last_line_had_side_comment = 0;
return;
}
if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
if ( $jmax < 0 ) {
if ( !$rOpts_swallow_optional_blank_lines ) {
flush();
$file_writer_object->write_blank_code_line();
$last_line_leading_type = 'b';
}
$last_line_had_side_comment = 0;
return;
}
my $is_static_block_comment_without_leading_space = 0;
if ( $jmax == 0
&& $$rtoken_type[0] eq '#'
&& $rOpts->{'static-block-comments'}
&& $input_line =~ /$static_block_comment_pattern/o )
{
$is_static_block_comment = 1;
$is_static_block_comment_without_leading_space =
substr( $input_line, 0, 1 ) eq '#';
}
if (
$jmax == 0
&& $$rtoken_type[0] eq '#'
&& $input_line =~ /^\ line \s+ (\d+) \s*
(?:\s("?)([^"]+)\2)? \s*
$/x
)
{
$is_static_block_comment = 1;
$is_static_block_comment_without_leading_space = 1;
}
if (
$jmax == 0
&& $$rtoken_type[0] eq '#' && $last_line_had_side_comment && $input_line =~ /^\s/ && !$is_static_block_comment && $rOpts->{'hanging-side-comments'} )
{
unshift @$rtoken_type, 'q';
unshift @$rtokens, '';
unshift @$rlevels, $$rlevels[0];
unshift @$rslevels, $$rslevels[0];
unshift @$rblock_type, '';
unshift @$rcontainer_type, '';
unshift @$rcontainer_environment, '';
unshift @$rtype_sequence, '';
unshift @$rnesting_tokens, $$rnesting_tokens[0];
unshift @$rci_levels, $$rci_levels[0];
unshift @$rnesting_blocks, $$rnesting_blocks[0];
$jmax = 1;
}
$last_line_had_side_comment =
( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
if ( $rOpts->{'delete-block-comments'} ) { return }
if ( $rOpts->{'tee-block-comments'} ) {
$file_writer_object->tee_on();
}
destroy_one_line_block();
output_line_to_go();
if (
$last_line_leading_type !~ /^[ && $rOpts->{'blanks-before-comments'} && !
$is_static_block_comment )
{
flush(); $file_writer_object->write_blank_code_line();
$last_line_leading_type = 'b';
}
$$rtokens[0] =~ s/\s*$//; # trim right end
if (
$rOpts->{'indent-block-comments'}
&& ( !$rOpts->{'indent-spaced-block-comments'}
|| $input_line =~ /^\s+/ )
&& !$is_static_block_comment_without_leading_space
)
{
extract_token(0);
store_token_to_go();
output_line_to_go();
}
else {
flush(); $file_writer_object->write_code_line( $$rtokens[0] . "\n" );
$last_line_leading_type = '#';
}
if ( $rOpts->{'tee-block-comments'} ) {
$file_writer_object->tee_off();
}
return;
}
my $structural_indentation_level = $$rlevels[0];
compare_indentation_levels( $python_indentation_level,
$structural_indentation_level )
unless ( $python_indentation_level < 0
|| ( $$rci_levels[0] > 0 )
|| ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
);
my $is_VERSION_statement = 0;
if (
!$saw_VERSION_in_this_file
&& $input_line =~ /VERSION/ && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
)
{
$saw_VERSION_in_this_file = 1;
$is_VERSION_statement = 1;
write_logfile_entry("passing VERSION line; -npvl deactivates\n");
$no_internal_newlines = 1;
}
if ( $rOpts->{'indent-only'} ) {
flush();
trim($input_line);
extract_token(0);
$token = $input_line;
$type = 'q';
$block_type = "";
$container_type = "";
$container_environment = "";
$type_sequence = "";
store_token_to_go();
output_line_to_go();
return;
}
push( @$rtokens, ' ', ' ' ); push( @$rtoken_type, 'b', 'b' );
($rwhite_space_flag) =
set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
if ( $max_index_to_go >= 0 ) {
$old_line_count_in_batch++;
if (
is_essential_whitespace(
$last_last_nonblank_token,
$last_last_nonblank_type,
$tokens_to_go[$max_index_to_go],
$types_to_go[$max_index_to_go],
$$rtokens[0],
$$rtoken_type[0]
)
)
{
my $slevel = $$rslevels[0];
insert_new_token_to_go( ' ', 'b', $slevel,
$no_internal_newlines );
}
}
if ($looking_for_else) {
unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
write_logfile_entry("(No else block)\n");
}
$looking_for_else = 0;
}
if ( ( $semicolons_before_block_self_destruct == 0 )
&& ( $max_index_to_go >= 0 )
&& ( $types_to_go[$max_index_to_go] eq ';' )
&& ( $$rtokens[0] ne '}' ) )
{
destroy_one_line_block();
output_line_to_go();
}
$type = 'b';
$token = "";
foreach $j ( 0 .. $jmax ) {
extract_token($j);
if ( $type eq '#' ) {
$token =~ s/\s*$//;
if (
$rOpts->{'delete-side-comments'}
|| ( $rOpts->{'delete-closing-side-comments'}
&& $token =~ /$closing_side_comment_prefix_pattern/o
&& $last_nonblank_block_type =~
/$closing_side_comment_list_pattern/o )
)
{
if ( $types_to_go[$max_index_to_go] eq 'b' ) {
unstore_token_to_go();
}
last;
}
}
if ( $rbrace_follower && $type ne 'b' ) {
unless ( $rbrace_follower->{$token} ) {
output_line_to_go();
}
$rbrace_follower = undef;
}
$j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
$next_nonblank_token = $$rtokens[$j_next];
$next_nonblank_token_type = $$rtoken_type[$j_next];
if ( $type =~ /^[wit]$/ ) {
if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
$token =~ s/\s*//g;
}
if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
}
elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
elsif ( $type eq 'n' ) {
if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
}
elsif ( $type eq 'Q' ) {
note_embedded_tab() if ( $token =~ "\t" );
if (
$token =~ /^(s|tr|y|m|\/)/
&& $last_nonblank_token =~ /^(=|==|!=)$/
&& $last_last_nonblank_type eq 'i'
&& $last_last_nonblank_token =~ /^\$/
&& $next_nonblank_token =~ /^[; \)\}]$/
&& !(
$types_to_go[0] eq 'k'
&& $tokens_to_go[0] =~ /^(my|our|local)$/
)
)
{
my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
complain(
"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
);
}
}
elsif ( $type eq 'q' ) {
$token =~ s/\s*$//;
note_embedded_tab() if ( $token =~ "\t" );
}
if ( ( $type ne 'b' )
&& ( $max_index_to_go >= 0 )
&& ( $types_to_go[$max_index_to_go] ne 'b' )
&& $rOpts_add_whitespace )
{
my $ws = $$rwhite_space_flag[$j];
if ( $ws == 1 ) {
insert_new_token_to_go( ' ', 'b', $slevel,
$no_internal_newlines );
}
}
my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
my $is_opening_BLOCK =
( $type eq '{'
&& $token eq '{'
&& $block_type
&& $block_type ne 't' );
my $is_closing_BLOCK =
( $type eq '}'
&& $token eq '}'
&& $block_type
&& $block_type ne 't' );
if ( $side_comment_follows
&& !$is_opening_BLOCK
&& !$is_closing_BLOCK )
{
$no_internal_newlines = 1;
}
if ($is_opening_BLOCK) {
store_token_to_go($side_comment_follows);
my $too_long =
starting_one_line_block( $j, $jmax, $level, $slevel,
$ci_level, $rtokens, $rtoken_type, $rblock_type );
clear_breakpoint_undo_stack();
my $keyword_on_same_line = 1;
if ( ( $max_index_to_go >= 0 )
&& ( $last_nonblank_type eq ')' ) )
{
if ( $block_type =~ /^(if|else|elsif)$/
&& ( $tokens_to_go[0] eq '}' )
&& $rOpts_cuddled_else )
{
$keyword_on_same_line = 1;
}
elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
{
$keyword_on_same_line = 0;
}
}
my $want_break =
$block_type !~ /^sub/
? $rOpts->{'opening-brace-on-new-line'}
: $block_type !~ /^sub\W*$/
? $rOpts->{'opening-sub-brace-on-new-line'}
: 0;
if (
$want_break
&& $index_start_one_line_block == UNDEFINED_INDEX
|| ( !$keyword_on_same_line
&& !$rOpts->{'opening-brace-always-on-right'} )
)
{
unless ($no_internal_newlines) {
unstore_token_to_go();
output_line_to_go();
store_token_to_go($side_comment_follows);
}
}
if ($side_comment_follows) { $no_internal_newlines = 1 }
unless ($no_internal_newlines) {
output_line_to_go();
}
}
elsif ($is_closing_BLOCK) {
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
if (
excess_line_length( $index_start_one_line_block,
$max_index_to_go ) >= 0
|| ( $semicolons_before_block_self_destruct == 0
&& $last_nonblank_type ne ';' )
)
{
destroy_one_line_block();
}
}
unless ( $no_internal_newlines
|| $index_start_one_line_block != UNDEFINED_INDEX )
{
if (
( $max_index_to_go > 0 )
&& ( $last_nonblank_type ne ';' )
&& ( $block_type !~ /^[\{\};]$/ )
&& ( !$is_sort_map_grep{$block_type} )
&& $rOpts->{'add-semicolons'}
)
{
save_current_token();
$token = ';';
$type = ';';
$level = $levels_to_go[$max_index_to_go];
$slevel = $nesting_depth_to_go[$max_index_to_go];
$nesting_blocks =
$nesting_blocks_to_go[$max_index_to_go];
$ci_level = $ci_levels_to_go[$max_index_to_go];
$block_type = "";
$container_type = "";
$container_environment = "";
$type_sequence = "";
if ( $types_to_go[$max_index_to_go] eq 'b' ) {
unstore_token_to_go();
}
store_token_to_go();
note_added_semicolon();
restore_current_token();
}
output_line_to_go();
}
if ($side_comment_follows) { $no_internal_newlines = 1 }
store_token_to_go();
my $is_one_line_block = 0;
my $keep_going = 0;
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
$is_one_line_block =
$types_to_go[$index_start_one_line_block];
undo_forced_breakpoint_stack(0);
set_nobreaks( $index_start_one_line_block,
$max_index_to_go - 1 );
destroy_one_line_block();
if (
$is_block_without_semicolon{$block_type}
&& $next_nonblank_token ne ';'
)
{
output_line_to_go() unless ($no_internal_newlines);
}
}
if ( $block_type eq 'do' ) {
$rbrace_follower = \%is_do_follower;
}
elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
$rbrace_follower = \%is_if_brace_follower;
}
elsif ( $block_type eq 'else' ) {
$rbrace_follower = \%is_else_brace_follower;
}
elsif ($is_sort_map_grep_eval{$block_type}
|| $is_one_line_block eq 'G' )
{
$rbrace_follower = undef;
$keep_going = 1;
}
elsif ( $block_type =~ /^sub\W*$/ ) {
if ($is_one_line_block) {
$rbrace_follower = \%is_anon_sub_1_brace_follower;
}
else {
$rbrace_follower = \%is_anon_sub_brace_follower;
}
}
else {
$rbrace_follower = \%is_other_brace_follower;
}
if ( $block_type eq 'elsif' ) {
if ( $next_nonblank_token_type eq 'b' ) { $looking_for_else = 1; }
else {
unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
write_logfile_entry("No else block :(\n");
}
}
}
if ($keep_going) {
}
elsif ( ( $next_nonblank_token_type eq 'b' )
&& $rOpts_add_newlines )
{
unless ($rbrace_follower) {
output_line_to_go() unless ($no_internal_newlines);
}
}
elsif ($rbrace_follower) {
unless ( $rbrace_follower->{$next_nonblank_token} ) {
output_line_to_go() unless ($no_internal_newlines);
}
$rbrace_follower = undef;
}
else {
output_line_to_go() unless ($no_internal_newlines);
}
}
elsif ( $type eq ';' ) {
$semicolons_before_block_self_destruct--;
if (
( $semicolons_before_block_self_destruct < 0 )
|| ( $semicolons_before_block_self_destruct == 0
&& $next_nonblank_token_type !~ /^[b\}]$/ )
)
{
destroy_one_line_block();
}
if (
(
$last_nonblank_token eq '}'
&& (
$is_block_without_semicolon{
$last_nonblank_block_type}
|| $last_nonblank_block_type =~ /^sub\s+\w/
|| $last_nonblank_block_type =~ /^\w+:$/ )
)
|| $last_nonblank_type eq ';'
)
{
if (
$rOpts->{'delete-semicolons'}
&& ( $next_nonblank_token_type ne '#' )
)
{
note_deleted_semicolon();
output_line_to_go()
unless ( $no_internal_newlines
|| $index_start_one_line_block != UNDEFINED_INDEX );
next;
}
else {
write_logfile_entry("Extra ';'\n");
}
}
store_token_to_go();
output_line_to_go()
unless ( $no_internal_newlines
|| ( $rOpts_keep_interior_semicolons && $j < $jmax )
|| ( $next_nonblank_token eq '}' ) );
}
elsif ( $type eq 'h' ) {
$no_internal_newlines =
1; destroy_one_line_block();
store_token_to_go();
}
else {
if ( $type eq 'b' ) {
$token = ' ' if $rOpts_add_whitespace;
my $ws = $$rwhite_space_flag[ $j + 1 ];
if ( ( defined($ws) && $ws == -1 )
|| $rOpts_delete_old_whitespace )
{
next
unless is_essential_whitespace(
$last_last_nonblank_token,
$last_last_nonblank_type,
$tokens_to_go[$max_index_to_go],
$types_to_go[$max_index_to_go],
$$rtokens[ $j + 1 ],
$$rtoken_type[ $j + 1 ]
);
}
}
store_token_to_go();
}
if ( $type ne '#' && $type ne 'b' ) {
$last_last_nonblank_token = $last_nonblank_token;
$last_last_nonblank_type = $last_nonblank_type;
$last_nonblank_token = $token;
$last_nonblank_type = $type;
$last_nonblank_block_type = $block_type;
}
$in_continued_quote = 0;
}
if (
( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
|| $in_quote
|| $is_VERSION_statement
|| ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
|| !$rOpts->{'delete-old-newlines'}
)
{
destroy_one_line_block();
output_line_to_go();
}
if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
$old_breakpoint_to_go[$max_index_to_go] = 1;
}
} }
sub output_line_to_go {
FORMATTER_DEBUG_FLAG_OUTPUT && do {
my ( $a, $b, $c ) = caller;
write_diagnostics(
"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
);
my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
write_diagnostics("$output_str\n");
};
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
set_forced_breakpoint($max_index_to_go);
return;
}
my $cscw_block_comment;
$cscw_block_comment = add_closing_side_comment()
if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
match_opening_and_closing_tokens();
finish_lp_batch();
my $saw_good_break = 0; if (
$block_type_to_go[$max_index_to_go]
&& !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
)
{
my $lev = $nesting_depth_to_go[$max_index_to_go];
for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
last if ( $levels_to_go[$i] < $lev ); next if ( $levels_to_go[$i] > $lev );
if ( $block_type_to_go[$i] ) {
if ( $tokens_to_go[$i] eq '}' ) {
set_forced_breakpoint($i);
$saw_good_break = 1;
}
}
elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
}
}
my $imin = 0;
my $imax = $max_index_to_go;
if ( $max_index_to_go >= 0 ) {
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
}
if ( $imin <= $imax ) {
if ( $last_line_leading_type !~ /^[ my $want_blank = 0;
my $leading_token = $tokens_to_go[$imin];
my $leading_type = $types_to_go[$imin];
if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
$want_blank = ( $rOpts->{'blanks-before-subs'} )
&& (
terminal_type( \@types_to_go, \@block_type_to_go, $imin,
$imax ) !~ /^[\;\}]$/
);
}
elsif ($leading_token =~ /^(package\s)/
&& $leading_type eq 'i' )
{
$want_blank = ( $rOpts->{'blanks-before-subs'} );
}
if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
$want_blank = ( $rOpts->{'blanks-before-subs'} )
&& (
terminal_type( \@types_to_go, \@block_type_to_go, $imin,
$imax ) ne '}'
);
}
elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
&& $leading_type eq 'k' )
{
my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
if ( !defined($lc) ) { $lc = 0 }
$want_blank =
$rOpts->{'blanks-before-blocks'}
&& $lc >= $rOpts->{'long-block-line-count'}
&& $file_writer_object->get_consecutive_nonblank_lines() >=
$rOpts->{'long-block-line-count'}
&& (
terminal_type( \@types_to_go, \@block_type_to_go, $imin,
$imax ) ne '}'
);
}
if ($want_blank) {
Perl::Tidy::VerticalAligner::flush();
$file_writer_object->write_blank_code_line();
}
}
$last_last_line_leading_level = $last_line_leading_level;
$last_line_leading_level = $levels_to_go[$imin];
if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
$last_line_leading_type = $types_to_go[$imin];
if ( $last_line_leading_level == $last_last_line_leading_level
&& $last_line_leading_type ne 'b'
&& $last_line_leading_type ne '#'
&& defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
{
$nonblank_lines_at_depth[$last_line_leading_level]++;
}
else {
$nonblank_lines_at_depth[$last_line_leading_level] = 1;
}
FORMATTER_DEBUG_FLAG_FLUSH && do {
my ( $package, $file, $line ) = caller;
print
"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
};
pad_array_to_go();
my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
if (
$max_index_to_go > 0
&& (
$is_long_line
|| $old_line_count_in_batch > 1
|| is_unbalanced_batch()
|| (
$comma_count_in_batch
&& ( $rOpts_maximum_fields_per_table > 0
|| $rOpts_comma_arrow_breakpoints == 0 )
)
)
)
{
$saw_good_break ||= scan_list();
}
my ( $ri_first, $ri_last );
if (
!$rOpts_add_newlines
|| (
!$forced_breakpoint_count
&& !$saw_good_break
&& !$is_long_line
)
)
{
@$ri_first = ($imin);
@$ri_last = ($imax);
}
else {
( $ri_first, $ri_last, my $colon_count ) =
set_continuation_breaks($saw_good_break);
break_all_chain_tokens( $ri_first, $ri_last );
break_equals( $ri_first, $ri_last );
if ( $rOpts->{'recombine'} ) {
( $ri_first, $ri_last ) =
recombine_breakpoints( $ri_first, $ri_last );
}
insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
}
my $do_not_pad = 0;
if ($rOpts_line_up_parentheses) {
$do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
}
send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
}
prepare_for_new_input_lines();
if ($cscw_block_comment) {
flush();
$file_writer_object->write_code_line( $cscw_block_comment . "\n" );
}
}
sub note_added_semicolon {
$last_added_semicolon_at = $input_line_number;
if ( $added_semicolon_count == 0 ) {
$first_added_semicolon_at = $last_added_semicolon_at;
}
$added_semicolon_count++;
write_logfile_entry("Added ';' here\n");
}
sub note_deleted_semicolon {
$last_deleted_semicolon_at = $input_line_number;
if ( $deleted_semicolon_count == 0 ) {
$first_deleted_semicolon_at = $last_deleted_semicolon_at;
}
$deleted_semicolon_count++;
write_logfile_entry("Deleted unnecessary ';'\n"); }
sub note_embedded_tab {
$embedded_tab_count++;
$last_embedded_tab_at = $input_line_number;
if ( !$first_embedded_tab_at ) {
$first_embedded_tab_at = $last_embedded_tab_at;
}
if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
write_logfile_entry("Embedded tabs in quote or pattern\n");
}
}
sub starting_one_line_block {
my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
$rblock_type )
= @_;
destroy_one_line_block();
my $i_start = 0;
if ( $max_index_to_go < 0 ) {
warning("program bug: store_token_to_go called incorrectly\n");
report_definite_bug();
}
else {
if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
return 0;
}
}
my $block_type = $$rblock_type[$j];
if ( $block_type =~ /^[\{\}\;\:]$/ ) {
$i_start = $max_index_to_go;
}
elsif ( $last_last_nonblank_token_to_go eq ')' ) {
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
unless ( $tokens_to_go[$i_start] eq $block_type ) {
return 0;
}
}
elsif (
( $last_last_nonblank_token_to_go eq $block_type )
|| ( $block_type =~ /^sub/
&& $last_last_nonblank_token_to_go =~ /^sub/ )
)
{
$i_start = $last_last_nonblank_index_to_go;
}
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
$i_start = $index_max_forced_break + 1;
if ( $types_to_go[$i_start] eq 'b' ) {
$i_start++;
}
unless ( $tokens_to_go[$i_start] eq $block_type ) {
return 0;
}
}
else {
return 1;
}
my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
my $i;
if ( $pos > $rOpts_maximum_line_length ) {
return 1;
}
for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
else { $pos += length( $$rtokens[$i] ) }
if ( $pos > $rOpts_maximum_line_length ) {
return 0;
}
elsif ($$rtokens[$i] eq '{'
&& $$rtoken_type[$i] eq '{'
&& $$rblock_type[$i] )
{
return 0;
}
elsif ($$rtokens[$i] eq '}'
&& $$rtoken_type[$i] eq '}'
&& $$rblock_type[$i] )
{
my $i_nonblank =
( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
if ( $$rtoken_type[$i_nonblank] eq '#' ) {
$pos += length( $$rtokens[$i_nonblank] );
if ( $i_nonblank > $i + 1 ) {
$pos += length( $$rtokens[ $i + 1 ] );
}
if ( $pos > $rOpts_maximum_line_length ) {
return 0;
}
}
create_one_line_block( $i_start, 20 );
return 0;
}
else {
}
}
if ( $is_sort_map_grep_eval{$block_type} ) {
create_one_line_block( $i_start, 1 );
}
return 0;
}
sub unstore_token_to_go {
if ( $max_index_to_go > 0 ) {
$max_index_to_go--;
}
else {
$max_index_to_go = UNDEFINED_INDEX;
}
}
sub want_blank_line {
flush();
$file_writer_object->want_blank_line();
}
sub write_unindented_line {
flush();
$file_writer_object->write_line( $_[0] );
}
sub undo_lp_ci {
my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
my $max_line = @$ri_first - 1;
return unless $max_line > $line_open;
my $lev_start = $levels_to_go[$i_start];
my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
my $n;
my $line_1 = 1 + $line_open;
for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
my $ibeg = $$ri_first[$n];
my $iend = $$ri_last[$n];
if ( $ibeg eq $closing_index ) { $n--; last }
return if ( $lev_start != $levels_to_go[$ibeg] );
return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
last if ( $closing_index <= $iend );
}
my $continuation_line_count = $n - $line_open;
@ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
(0) x ($continuation_line_count);
@leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
@reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
}
sub set_logical_padding {
my ( $ri_first, $ri_last ) = @_;
my $max_line = @$ri_first - 1;
my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
$tok_next, $type_next, $has_leading_op_next, $has_leading_op );
foreach $line ( 0 .. $max_line - 1 ) {
$ibeg = $$ri_first[$line];
$iend = $$ri_last[$line];
$ibeg_next = $$ri_first[ $line + 1 ];
$tok_next = $tokens_to_go[$ibeg_next];
$type_next = $types_to_go[$ibeg_next];
$has_leading_op_next = ( $tok_next =~ /^\w/ )
? $is_chain_operator{$tok_next} : $is_chain_operator{$type_next};
next unless ($has_leading_op_next);
next
if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
$ipad = undef;
if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
if ( $line > 0 ) {
next if $has_leading_op;
next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
my $ok_comma;
if ( $types_to_go[$iendm] eq ','
&& $line == 1
&& $max_line > 2 )
{
my $ibeg_next_next = $$ri_first[ $line + 2 ];
my $tok_next_next = $tokens_to_go[$ibeg_next_next];
$ok_comma = $tok_next_next eq $tok_next;
}
next
unless (
$is_assignment{ $types_to_go[$iendm] }
|| $ok_comma
|| ( $nesting_depth_to_go[$ibegm] <
$nesting_depth_to_go[$ibeg] )
|| ( $types_to_go[$iendm] eq 'k'
&& $tokens_to_go[$iendm] eq 'return' )
);
$ipad = $ibeg;
}
else {
next if $starting_in_quote;
if ( $types_to_go[$ibeg] eq '}' ) {
}
else {
next
if ( $nesting_depth_to_go[$ibeg] !=
$nesting_depth_to_go[$ibeg_next] );
if ( $max_line > 1 ) {
my $leading_token = $tokens_to_go[$ibeg_next];
my $tokens_differ;
next if ( $leading_token eq '.' );
my $count = 1;
foreach my $l ( 2 .. 3 ) {
last if ( $line + $l > $max_line );
my $ibeg_next_next = $$ri_first[ $line + $l ];
if ( $tokens_to_go[$ibeg_next_next] ne
$leading_token )
{
$tokens_differ = 1;
last;
}
$count++;
}
next if ($tokens_differ);
next if ( $count < 3 && $leading_token ne ':' );
$ipad = $ibeg;
}
else {
next;
}
}
}
}
if ( !defined($ipad) ) {
for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
next
unless ( $type_sequence_to_go[$i]
&& $mate_index_to_go[$i] > $iend );
$ipad = $i + 1;
if ( $types_to_go[$ipad] eq 'b' ) {
$ipad++;
last if ( $ipad > $iend );
}
}
last unless $ipad;
}
my $iend_next = $$ri_last[ $line + 1 ];
next
if ( $nesting_depth_to_go[ $iend_next + 1 ] >
$nesting_depth_to_go[$ipad] );
my $inext_next = $ibeg_next + 1;
if ( $types_to_go[$inext_next] eq 'b' ) {
$inext_next++;
}
my $type = $types_to_go[$ipad];
my $type_next = $types_to_go[ $ipad + 1 ];
my $logical_continuation_lines = 1;
if ( $line + 2 <= $max_line ) {
my $leading_token = $tokens_to_go[$ibeg_next];
my $ibeg_next_next = $$ri_first[ $line + 2 ];
if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
&& $nesting_depth_to_go[$ibeg_next] eq
$nesting_depth_to_go[$ibeg_next_next] )
{
$logical_continuation_lines++;
}
}
my $types_match = $types_to_go[$inext_next] eq $type;
my $matches_without_bang;
if ( !$types_match && $type eq '!' ) {
$types_match = $matches_without_bang =
$types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
}
if (
( $logical_continuation_lines > 1 && $ipad > 0 )
|| (
$types_match
&& !(
$type eq 'k'
&& $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
)
)
)
{
my $ok_to_pad = 1;
my $ibg = $$ri_first[ $line + 1 ];
my $depth = $nesting_depth_to_go[ $ibg + 1 ];
my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
my $l = $line + 1;
foreach $l ( $line + 2 .. $max_line ) {
my $ibg = $$ri_first[$l];
last
if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
|| ( $nesting_depth_to_go[$ibg] < $depth );
if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
$ok_to_pad = 0;
last;
}
}
if ( $l == $max_line ) {
my $i2 = $$ri_last[$l];
if ( $types_to_go[$i2] eq '#' ) {
my $i1 = $$ri_first[$l];
next
if (
terminal_type( \@types_to_go, \@block_type_to_go, $i1,
$i2 ) eq ','
);
}
}
if ( $types_to_go[$ibeg_next] eq 'm' ) {
$ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
}
next unless $ok_to_pad;
my $length_1 = total_line_length( $ibeg, $ipad - 1 );
my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
$pad_spaces = $length_2 - $length_1;
if ($matches_without_bang) { $pad_spaces-- }
my $indentation_1 = $leading_spaces_to_go[$ibeg];
if ( ref($indentation_1) ) {
if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
$pad_spaces = 0;
}
}
}
if ( $pad_spaces < 0 ) {
if ( $pad_spaces == -1 ) {
if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
$tokens_to_go[ $ipad - 1 ] = '';
}
}
$pad_spaces = 0;
}
if ( $ipad >= 0 && $pad_spaces ) {
my $length_t = total_line_length( $ibeg, $iend );
if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
$tokens_to_go[$ipad] =
' ' x $pad_spaces . $tokens_to_go[$ipad];
}
}
}
}
continue {
$iendm = $iend;
$ibegm = $ibeg;
$has_leading_op = $has_leading_op_next;
} return;
}
sub correct_lp_indentation {
my ( $ri_first, $ri_last ) = @_;
my $do_not_pad = 0;
my $max_line = @$ri_first - 1;
my ( $ibeg, $iend );
my $line;
foreach $line ( 0 .. $max_line ) {
$ibeg = $$ri_first[$line];
$iend = $$ri_last[$line];
my $i;
foreach $i ( $ibeg .. $iend ) {
my $indentation = $leading_spaces_to_go[$i];
if ( !$indentation->get_MARKED() ) {
$indentation->set_MARKED(1);
next unless ( $indentation->get_ALIGN_PAREN() );
if ( $i > $ibeg ) {
my $im = $i - 1;
if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
if ( $type_sequence_to_go[$im]
&& $mate_index_to_go[$im] <= $iend )
{
next;
}
}
if ( $line == 1 && $i == $ibeg ) {
$do_not_pad = 1;
}
my $actual_pos;
my $predicted_pos = $indentation->get_SPACES();
if ( $i > $ibeg ) {
$actual_pos = total_line_length( $ibeg, $i - 1 );
my $closing_index = $indentation->get_CLOSED();
if ( $closing_index > $iend ) {
my $ibeg_next = $$ri_first[ $line + 1 ];
if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
undo_lp_ci( $line, $i, $closing_index, $ri_first,
$ri_last );
}
}
}
elsif ( $line > 0 ) {
my $ibegm = $$ri_first[ $line - 1 ];
my $iendm = $$ri_last[ $line - 1 ];
$actual_pos = total_line_length( $ibegm, $iendm );
++$actual_pos
if ( $types_to_go[ $iendm + 1 ] eq 'b' );
}
else {
$actual_pos = $predicted_pos;
}
my $move_right = $actual_pos - $predicted_pos;
if ( $move_right == 0 ) {
$indentation->set_RECOVERABLE_SPACES($move_right);
next;
}
my $closing_index = $indentation->get_CLOSED();
if ( $closing_index < 0 ) {
$indentation->set_RECOVERABLE_SPACES($move_right);
next;
}
my $right_margin = 0;
my $have_child = $indentation->get_HAVE_CHILD();
my %saw_indentation;
my $line_count = 1;
$saw_indentation{$indentation} = $indentation;
if ( $have_child || $move_right > 0 ) {
$have_child = 0;
my $max_length = 0;
if ( $i == $ibeg ) {
$max_length = total_line_length( $ibeg, $iend );
}
my $line_t;
foreach $line_t ( $line + 1 .. $max_line ) {
my $ibeg_t = $$ri_first[$line_t];
my $iend_t = $$ri_last[$line_t];
last if ( $closing_index <= $ibeg_t );
my $indentation_t = $leading_spaces_to_go[$ibeg_t];
$saw_indentation{$indentation_t} = $indentation_t;
$line_count++;
my $length_t = total_line_length( $ibeg_t, $iend_t );
if ( $length_t > $max_length ) {
$max_length = $length_t;
}
}
$right_margin = $rOpts_maximum_line_length - $max_length;
if ( $right_margin < 0 ) { $right_margin = 0 }
}
my $first_line_comma_count =
grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
my $comma_count = $indentation->get_COMMA_COUNT();
my $arrow_count = $indentation->get_ARROW_COUNT();
my $indentation_count = keys %saw_indentation;
my $is_vertically_aligned =
( $i == $ibeg
&& $first_line_comma_count > 1
&& $indentation_count == 1
&& ( $arrow_count == 0 || $arrow_count == $line_count ) );
if (
$move_right < 0
|| ( $comma_count == 0 )
|| ( $comma_count > 0 && !$is_vertically_aligned )
)
{
my $move =
( $move_right <= $right_margin )
? $move_right
: $right_margin;
foreach ( keys %saw_indentation ) {
$saw_indentation{$_}
->permanently_decrease_AVAILABLE_SPACES( -$move );
}
}
else {
$indentation->set_RECOVERABLE_SPACES($move_right);
}
}
}
}
return $do_not_pad;
}
sub flush {
destroy_one_line_block();
output_line_to_go();
Perl::Tidy::VerticalAligner::flush();
}
sub reset_block_text_accumulator {
if ($accumulating_text_for_block) {
if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
push @{$rleading_block_if_elsif_text}, $leading_block_text;
}
}
$accumulating_text_for_block = "";
$leading_block_text = "";
$leading_block_text_level = 0;
$leading_block_text_length_exceeded = 0;
$leading_block_text_line_number = 0;
$leading_block_text_line_length = 0;
}
sub set_block_text_accumulator {
my $i = shift;
$accumulating_text_for_block = $tokens_to_go[$i];
if ( $accumulating_text_for_block !~ /^els/ ) {
$rleading_block_if_elsif_text = [];
}
$leading_block_text = "";
$leading_block_text_level = $levels_to_go[$i];
$leading_block_text_line_number =
$vertical_aligner_object->get_output_line_number();
$leading_block_text_length_exceeded = 0;
$leading_block_text_line_length =
length($accumulating_text_for_block) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$leading_block_text_level * $rOpts_indent_columns + 3;
}
sub accumulate_block_text {
my $i = shift;
if ( $accumulating_text_for_block
&& !$leading_block_text_length_exceeded
&& $types_to_go[$i] ne '#' )
{
my $added_length = length( $tokens_to_go[$i] );
$added_length += 1 if $i == 0;
my $new_line_length = $leading_block_text_line_length + $added_length;
if (
length($leading_block_text) <
$rOpts_closing_side_comment_maximum_text
&& ( $new_line_length < $rOpts_maximum_line_length
|| length($leading_block_text) + $added_length <
$rOpts_closing_side_comment_maximum_text )
|| (
$tokens_to_go[$i] eq ')'
&& (
(
$i + 1 <= $max_index_to_go
&& $block_type_to_go[ $i + 1 ] eq
$accumulating_text_for_block
)
|| ( $i + 2 <= $max_index_to_go
&& $block_type_to_go[ $i + 2 ] eq
$accumulating_text_for_block )
)
)
)
{
if ( $i == 0 ) { $leading_block_text .= ' ' }
$leading_block_text .= $tokens_to_go[$i];
$leading_block_text_line_length = $new_line_length;
}
elsif ( $types_to_go[$i] ne 'b' ) {
$leading_block_text_length_exceeded = 1;
$leading_block_text .= '...';
}
}
}
{
my %is_if_elsif_else_unless_while_until_for_foreach;
BEGIN {
@_ = qw(if elsif else unless while until for foreach case when);
@is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
}
sub accumulate_csc_text {
my $block_leading_text = ""; my $rblock_leading_if_elsif_text;
my $i_block_leading_text =
-1; my $block_line_count = 100; my $terminal_type = 'b'; my $i_terminal = 0; my $terminal_block_type = "";
for my $i ( 0 .. $max_index_to_go ) {
my $type = $types_to_go[$i];
my $block_type = $block_type_to_go[$i];
my $token = $tokens_to_go[$i];
if ( $type ne '#' && $type ne 'b' ) {
$terminal_type = $type;
$terminal_block_type = $block_type;
$i_terminal = $i;
}
my $type_sequence = $type_sequence_to_go[$i];
if ( $block_type && $type_sequence ) {
if ( $token eq '}' ) {
if ( defined( $block_leading_text{$type_sequence} ) ) {
( $block_leading_text, $rblock_leading_if_elsif_text ) =
@{ $block_leading_text{$type_sequence} };
$i_block_leading_text = $i;
delete $block_leading_text{$type_sequence};
$rleading_block_if_elsif_text =
$rblock_leading_if_elsif_text;
}
if ( $accumulating_text_for_block
&& $levels_to_go[$i] <= $leading_block_text_level )
{
my $lev = $levels_to_go[$i];
reset_block_text_accumulator();
}
if ( defined( $block_opening_line_number{$type_sequence} ) )
{
my $output_line_number =
$vertical_aligner_object->get_output_line_number();
$block_line_count =
$output_line_number -
$block_opening_line_number{$type_sequence} + 1;
delete $block_opening_line_number{$type_sequence};
}
else {
}
}
elsif ( $token eq '{' ) {
my $line_number =
$vertical_aligner_object->get_output_line_number();
$block_opening_line_number{$type_sequence} = $line_number;
if ( $accumulating_text_for_block
&& $levels_to_go[$i] == $leading_block_text_level )
{
if ( $accumulating_text_for_block eq $block_type ) {
$block_leading_text{$type_sequence} = [
$leading_block_text,
$rleading_block_if_elsif_text
];
$block_opening_line_number{$type_sequence} =
$leading_block_text_line_number;
reset_block_text_accumulator();
}
else {
}
}
}
}
if ( $type eq 'k'
&& $csc_new_statement_ok
&& $is_if_elsif_else_unless_while_until_for_foreach{$token}
&& $token =~ /$closing_side_comment_list_pattern/o )
{
set_block_text_accumulator($i);
}
else {
if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
$csc_new_statement_ok =
( $block_type || $type eq 'J' || $type eq ';' );
}
if ( $type eq ';'
&& $accumulating_text_for_block
&& $levels_to_go[$i] == $leading_block_text_level )
{
reset_block_text_accumulator();
}
else {
accumulate_block_text($i);
}
}
}
if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
$block_leading_text =
make_else_csc_text( $i_terminal, $terminal_block_type,
$block_leading_text, $rblock_leading_if_elsif_text );
}
return ( $terminal_type, $i_terminal, $i_block_leading_text,
$block_leading_text, $block_line_count );
}
}
sub make_else_csc_text {
my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
my $csc_text = $block_leading_text;
if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
{
return $csc_text;
}
my $count = @{$rif_elsif_text};
return $csc_text unless ($count);
my $if_text = '[ if' . $rif_elsif_text->[0];
if ( $block_type eq 'else' ) {
$csc_text .= $if_text;
}
if ( $rOpts_closing_side_comment_else_flag == 0 ) {
return $csc_text;
}
my $last_elsif_text = "";
if ( $count > 1 ) {
$last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
}
my $saved_text = $csc_text;
if ( $block_type eq 'else' ) {
$csc_text .= $last_elsif_text;
}
else {
$csc_text .= ' ' . $if_text;
}
if ( $rOpts_closing_side_comment_else_flag == 2 ) {
return $csc_text;
}
my $length =
length($csc_text) +
length($block_type) +
length( $rOpts->{'closing-side-comment-prefix'} ) +
$levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
if ( $length > $rOpts_maximum_line_length ) {
$csc_text = $saved_text;
}
return $csc_text;
}
sub add_closing_side_comment {
my $cscw_block_comment;
my ( $terminal_type, $i_terminal, $i_block_leading_text,
$block_leading_text, $block_line_count )
= accumulate_csc_text();
my $have_side_comment = $i_terminal != $max_index_to_go;
if (
$terminal_type eq '}'
&& (
( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
|| ( $have_side_comment
&& $rOpts->{'closing-side-comment-warnings'} )
)
&& $block_type_to_go[$i_terminal] =~
/$closing_side_comment_list_pattern/o
&& $block_type_to_go[$i_terminal] ne 'sub'
&& $mate_index_to_go[$i_terminal] < 0
&& (
!$have_side_comment
|| $tokens_to_go[$max_index_to_go] =~
/$closing_side_comment_prefix_pattern/o
)
)
{
my $token =
"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
if ( $i_block_leading_text == $i_terminal ) {
$token .= $block_leading_text;
}
$token =~ s/\s*$//; # trim any trailing whitespace
if ($have_side_comment) {
if ( $rOpts->{'closing-side-comment-warnings'} ) {
my $old_csc = $tokens_to_go[$max_index_to_go];
my $new_csc = $token;
$new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
my $new_trailing_dots = $1;
$old_csc =~ s/\.\.\.\s*$//;
$new_csc =~ s/\s+//g; # trim all whitespace
$old_csc =~ s/\s+//g;
if ( $block_type_to_go[$i_terminal] eq 'else' ) {
if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
}
elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
}
if ( length($new_csc) > length($old_csc) ) {
$new_csc = substr( $new_csc, 0, length($old_csc) );
}
if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
{
$old_csc = substr( $old_csc, 0, length($new_csc) );
}
if ( $new_csc ne $old_csc ) {
if ( $block_line_count <
$rOpts->{'closing-side-comment-interval'} )
{
$token = undef;
}
else {
warning(
"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
);
my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
$year += 1900;
$month += 1;
$cscw_block_comment =
"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
}
}
else {
if ( $block_line_count <
$rOpts->{'closing-side-comment-interval'} )
{
$token = undef;
unstore_token_to_go()
if ( $types_to_go[$max_index_to_go] eq '#' );
unstore_token_to_go()
if ( $types_to_go[$max_index_to_go] eq 'b' );
}
}
}
$tokens_to_go[$max_index_to_go] = $token if $token;
}
else {
my $type = '#';
my $block_type = '';
my $type_sequence = '';
my $container_environment =
$container_environment_to_go[$max_index_to_go];
my $level = $levels_to_go[$max_index_to_go];
my $slevel = $nesting_depth_to_go[$max_index_to_go];
my $no_internal_newlines = 0;
my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
my $ci_level = $ci_levels_to_go[$max_index_to_go];
my $in_continued_quote = 0;
insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
insert_new_token_to_go( $token, $type, $slevel,
$no_internal_newlines );
}
}
return $cscw_block_comment;
}
sub previous_nonblank_token {
my ($i) = @_;
my $name = "";
my $im = $i - 1;
return "" if ( $im < 0 );
if ( $types_to_go[$im] eq 'b' ) { $im--; }
return "" if ( $im < 0 );
$name = $tokens_to_go[$im];
if ( $name eq '->' ) {
$im--;
if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
$name = $tokens_to_go[$im] . $name;
}
}
return $name;
}
sub send_lines_to_vertical_aligner {
my ( $ri_first, $ri_last, $do_not_pad ) = @_;
my $rindentation_list = [0];
set_vertical_alignment_markers( $ri_first, $ri_last );
my $must_flush = 0;
if ( @$ri_first > 1 ) {
if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
$must_flush = 1;
}
}
if ($must_flush) {
Perl::Tidy::VerticalAligner::flush();
}
set_logical_padding( $ri_first, $ri_last );
my $n_last_line = @$ri_first - 1;
my $in_comma_list;
for my $n ( 0 .. $n_last_line ) {
my $ibeg = $$ri_first[$n];
my $iend = $$ri_last[$n];
my ( $rtokens, $rfields, $rpatterns ) =
make_alignment_patterns( $ibeg, $iend );
my ( $indentation, $lev, $level_end, $terminal_type,
$is_semicolon_terminated, $is_outdented_line )
= set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
$ri_first, $ri_last, $rindentation_list );
my $outdent_long_lines = (
( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
|| (
$types_to_go[$ibeg] eq '#'
&& $rOpts->{'outdent-long-comments'}
&& !$is_static_block_comment
)
);
my $level_jump =
$nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
my $rvertical_tightness_flags =
set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
$ri_first, $ri_last );
Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
my $is_terminal_ternary = 0;
if ( $tokens_to_go[$ibeg] eq ':'
|| $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
{
if ( ( $terminal_type eq ';' && $level_end <= $lev )
|| ( $level_end < $lev ) )
{
$is_terminal_ternary = 1;
}
}
my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
Perl::Tidy::VerticalAligner::append_line(
$lev,
$level_end,
$indentation,
$rfields,
$rtokens,
$rpatterns,
$forced_breakpoint_to_go[$iend] || $in_comma_list,
$outdent_long_lines,
$is_terminal_ternary,
$is_semicolon_terminated,
$do_not_pad,
$rvertical_tightness_flags,
$level_jump,
);
$in_comma_list =
$tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
$do_not_pad = 0;
}
save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
}
{
my %block_type_map;
my %keyword_map;
BEGIN {
%block_type_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'if',
'default' => 'if',
'case' => 'if',
'sort' => 'map',
'grep' => 'map',
);
%keyword_map = (
'unless' => 'if',
'else' => 'if',
'elsif' => 'if',
'when' => 'given',
'default' => 'given',
'case' => 'switch',
'undef' => 'Q',
);
}
sub make_alignment_patterns {
my ( $ibeg, $iend ) = @_;
my @tokens = ();
my @fields = ();
my @patterns = ();
my $i_start = $ibeg;
my $i;
my $depth = 0;
my @container_name = ("");
my @multiple_comma_arrows = (undef);
my $j = 0;
$patterns[0] = "";
for $i ( $ibeg .. $iend ) {
if ( $tokens_to_go[$i] eq '(' ) {
my $i_mate = $mate_index_to_go[$i];
if ( $i_mate > $i && $i_mate <= $iend ) {
$depth++;
my $seqno = $type_sequence_to_go[$i];
my $count = comma_arrow_count($seqno);
$multiple_comma_arrows[$depth] = $count && $count > 1;
my $name = previous_nonblank_token($i);
$name =~ s/^->//;
$container_name[$depth] = "+" . $name;
if ( $matching_token_to_go[$i] eq '' ) {
my $len =
length(
join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
$len += leading_spaces_to_go($i_start)
if ( $i_start == $ibeg );
$container_name[$depth] .= "-" . $len;
}
}
}
elsif ( $tokens_to_go[$i] eq ')' ) {
$depth-- if $depth > 0;
}
if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
my $tok = my $raw_tok = $matching_token_to_go[$i];
if ( $raw_tok ne '#' ) {
$tok .= "$nesting_depth_to_go[$i]";
}
if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
if ( $container_name[$depth] ) {
$tok .= $container_name[$depth];
}
}
if ( $raw_tok eq '(' ) {
my $ci = $ci_levels_to_go[$ibeg];
if ( $container_name[$depth] =~ /^\+(if|unless)/
&& $ci )
{
$tok .= $container_name[$depth];
}
}
if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
my $block_type = $block_type_to_go[$i];
$block_type = $block_type_map{$block_type}
if ( defined( $block_type_map{$block_type} ) );
if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
$tok .= $block_type;
}
push( @fields,
join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
push( @tokens, $tok );
$i_start = $i;
$j++;
$patterns[$j] = "";
}
if ( $types_to_go[$i] ne 'k' ) {
my $type = $types_to_go[$i];
if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
my $next_type = $types_to_go[ $i + 1 ];
my $i_next_nonblank =
( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
$type = 'Q';
if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
}
}
if ( $type eq 'n' ) { $type = 'Q' }
if ( $type eq '!' ) { $type = '' }
$patterns[$j] .= $type;
}
else {
my $tok = $tokens_to_go[$i];
$tok = $keyword_map{$tok}
if ( defined( $keyword_map{$tok} ) );
$patterns[$j] .= $tok;
}
}
push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
return ( \@tokens, \@fields, \@patterns );
}
}
{
my @unmatched_opening_indexes_in_this_batch;
my @unmatched_closing_indexes_in_this_batch;
my %comma_arrow_count;
sub is_unbalanced_batch {
@unmatched_opening_indexes_in_this_batch +
@unmatched_closing_indexes_in_this_batch;
}
sub comma_arrow_count {
my $seqno = $_[0];
return $comma_arrow_count{$seqno};
}
sub match_opening_and_closing_tokens {
@unmatched_opening_indexes_in_this_batch = ();
@unmatched_closing_indexes_in_this_batch = ();
%comma_arrow_count = ();
my ( $i, $i_mate, $token );
foreach $i ( 0 .. $max_index_to_go ) {
if ( $type_sequence_to_go[$i] ) {
$token = $tokens_to_go[$i];
if ( $token =~ /^[\(\[\{\?]$/ ) {
push @unmatched_opening_indexes_in_this_batch, $i;
}
elsif ( $token =~ /^[\)\]\}\:]$/ ) {
$i_mate = pop @unmatched_opening_indexes_in_this_batch;
if ( defined($i_mate) && $i_mate >= 0 ) {
if ( $type_sequence_to_go[$i_mate] ==
$type_sequence_to_go[$i] )
{
$mate_index_to_go[$i] = $i_mate;
$mate_index_to_go[$i_mate] = $i;
}
else {
push @unmatched_opening_indexes_in_this_batch,
$i_mate;
push @unmatched_closing_indexes_in_this_batch, $i;
}
}
else {
push @unmatched_closing_indexes_in_this_batch, $i;
}
}
}
elsif ( $tokens_to_go[$i] eq '=>' ) {
if (@unmatched_opening_indexes_in_this_batch) {
my $j = $unmatched_opening_indexes_in_this_batch[-1];
my $seqno = $type_sequence_to_go[$j];
$comma_arrow_count{$seqno}++;
}
}
}
}
sub save_opening_indentation {
my ( $ri_first, $ri_last, $rindentation_list ) = @_;
foreach (@unmatched_closing_indexes_in_this_batch) {
my $seqno = $type_sequence_to_go[$_];
delete $saved_opening_indentation{$seqno};
}
foreach (@unmatched_opening_indexes_in_this_batch) {
my $seqno = $type_sequence_to_go[$_];
$saved_opening_indentation{$seqno} = [
lookup_opening_indentation(
$_, $ri_first, $ri_last, $rindentation_list
)
];
}
}
}
sub get_opening_indentation {
my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
my $i_opening = $mate_index_to_go[$i_closing];
my ( $indent, $offset, $is_leading, $exists );
$exists = 1;
if ( $i_opening >= 0 ) {
( $indent, $offset, $is_leading ) =
lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
$rindentation_list );
}
else {
my $seqno = $type_sequence_to_go[$i_closing];
if ($seqno) {
if ( $saved_opening_indentation{$seqno} ) {
( $indent, $offset, $is_leading ) =
@{ $saved_opening_indentation{$seqno} };
}
else {
$indent = 0;
$offset = 0;
$is_leading = 0;
$exists = 0;
}
}
else {
$indent = 0;
$offset = 0;
$is_leading = 0;
$exists = 0;
}
}
return ( $indent, $offset, $is_leading, $exists );
}
sub lookup_opening_indentation {
my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
my $nline = $rindentation_list->[0];
$nline = 0 if ( $i_opening < $ri_start->[$nline] );
unless ( $i_opening > $ri_last->[-1] ) {
while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
}
else {
warning(
"non-fatal program bug in lookup_opening_indentation - index out of range\n"
);
report_definite_bug();
$nline = $ }
$rindentation_list->[0] =
$nline; my $ibeg = $ri_start->[$nline];
my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
my $is_leading = ( $ibeg == $i_opening );
return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
}
{
my %is_if_elsif_else_unless_while_until_for_foreach;
BEGIN {
@_ = qw(if elsif else unless while until for foreach case when);
@is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
}
sub set_adjusted_indentation {
my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
$rindentation_list )
= @_;
my ( $terminal_type, $i_terminal ) =
terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
my $is_outdented_line = 0;
my $is_semicolon_terminated = $terminal_type eq ';'
&& $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
my $adjust_indentation = 0;
my $default_adjust_indentation = $adjust_indentation;
my (
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
);
if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
if (
$is_semicolon_terminated
|| (
$terminal_type eq '('
&& $types_to_go[$ibeg] eq ')'
&& ( $nesting_depth_to_go[$iend] + 1 ==
$nesting_depth_to_go[$ibeg] )
)
)
{
$adjust_indentation = 1;
}
if (
$terminal_type eq ','
&& $i_terminal == $ibeg + 1
&& $container_environment_to_go[$i_terminal] eq 'LIST'
)
{
$adjust_indentation = 1;
}
if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
&& $i_terminal == $ibeg )
{
my $ci = $ci_levels_to_go[$ibeg];
my $lev = $levels_to_go[$ibeg];
my $next_type = $types_to_go[ $ibeg + 1 ];
my $i_next_nonblank =
( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
if ( $i_next_nonblank <= $max_index_to_go
&& $levels_to_go[$i_next_nonblank] < $lev )
{
$adjust_indentation = 1;
}
}
$default_adjust_indentation = $adjust_indentation;
if ( !$block_type_to_go[$ibeg] ) {
my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
if ( $cti == 1 ) {
if ( $i_terminal <= $ibeg + 1
|| $is_semicolon_terminated )
{
$adjust_indentation = 2;
}
else {
$adjust_indentation = 0;
}
}
elsif ( $cti == 2 ) {
if ($is_semicolon_terminated) {
$adjust_indentation = 3;
}
else {
$adjust_indentation = 0;
}
}
elsif ( $cti == 3 ) {
$adjust_indentation = 3;
}
}
else {
if (
$rOpts->{'indent-closing-brace'}
&& (
$i_terminal == $ibeg || $is_semicolon_terminated
)
) {
$adjust_indentation = 3;
}
}
}
elsif ($$rpatterns[0] =~ /^qb*;$/
&& $$rfields[0] =~ /^([\)\}\]\>]);$/ )
{
if ( $closing_token_indentation{$1} == 0 ) {
$adjust_indentation = 1;
}
else {
$adjust_indentation = 3;
}
}
elsif ( $types_to_go[$ibeg] eq ':' ) {
(
$opening_indentation, $opening_offset,
$is_leading, $opening_exists
)
= get_opening_indentation( $ibeg, $ri_first, $ri_last,
$rindentation_list );
if ($is_leading) { $adjust_indentation = 2; }
}
my $indentation;
my $lev;
my $level_end = $levels_to_go[$iend];
if ( $adjust_indentation == 0 ) {
$indentation = $leading_spaces_to_go[$ibeg];
$lev = $levels_to_go[$ibeg];
}
elsif ( $adjust_indentation == 1 ) {
$indentation = $reduced_spaces_to_go[$i_terminal];
$lev = $levels_to_go[$i_terminal];
}
elsif ( $adjust_indentation == 2 ) {
$lev = $levels_to_go[$ibeg];
my $space_count =
get_SPACES($opening_indentation) + $opening_offset;
my $last_spaces = get_SPACES($last_indentation_written);
if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
$last_spaces +=
get_RECOVERABLE_SPACES($last_indentation_written);
}
$lev = $levels_to_go[$ibeg];
if ( $space_count < $last_spaces ) {
if ($rOpts_line_up_parentheses) {
my $lev = $levels_to_go[$ibeg];
$indentation =
new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
}
else {
$indentation = $space_count;
}
}
else {
$space_count = leading_spaces_to_go($ibeg);
if ( $default_adjust_indentation == 0 ) {
$indentation = $leading_spaces_to_go[$ibeg];
}
elsif ( $default_adjust_indentation == 1 ) {
$indentation = $reduced_spaces_to_go[$i_terminal];
$lev = $levels_to_go[$i_terminal];
}
}
}
else {
if ( $block_type_to_go[$ibeg]
&& $ci_levels_to_go[$i_terminal] == 0 )
{
my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
$indentation = $spaces + $rOpts_indent_columns;
}
else {
$indentation = $last_unadjusted_indentation;
if ( get_SPACES($last_indentation_written) <
get_SPACES($indentation) )
{
$indentation = $last_indentation_written;
}
}
$lev = $levels_to_go[$ibeg];
}
unless ( $ibeg == 0 && $starting_in_quote ) {
$last_indentation_written = $indentation;
$last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
$last_leading_token = $tokens_to_go[$ibeg];
}
my $is_isolated_block_brace = $block_type_to_go[$ibeg]
&& ( $iend == $ibeg
|| $is_if_elsif_else_unless_while_until_for_foreach{
$block_type_to_go[$ibeg] } );
my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
if ( defined($opening_indentation)
&& !$is_isolated_block_brace
&& !$is_unaligned_colon )
{
if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
$indentation = $opening_indentation;
}
}
push @{$rindentation_list}, $indentation;
if (
$ibeg == 0
&& (
(
$rOpts->{'outdent-keywords'}
&& $types_to_go[$ibeg] eq 'k'
&& $outdent_keyword{ $tokens_to_go[$ibeg] }
)
|| ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
|| ( $types_to_go[$ibeg] eq '#'
&& $rOpts->{'outdent-static-block-comments'}
&& $is_static_block_comment )
)
)
{
my $space_count = leading_spaces_to_go($ibeg);
if ( $space_count > 0 ) {
$space_count -= $rOpts_continuation_indentation;
$is_outdented_line = 1;
if ( $space_count < 0 ) { $space_count = 0 }
if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
$space_count = 1;
}
if ($rOpts_line_up_parentheses) {
$indentation =
new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
}
else {
$indentation = $space_count;
}
}
}
return ( $indentation, $lev, $level_end, $terminal_type,
$is_semicolon_terminated, $is_outdented_line );
}
}
sub set_vertical_tightness_flags {
my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
if ( $n < $n_last_line ) {
my $ibeg_next = $$ri_first[ $n + 1 ];
my $token_end = $tokens_to_go[$iend];
my $iend_next = $$ri_last[ $n + 1 ];
if (
$type_sequence_to_go[$iend]
&& !$block_type_to_go[$iend]
&& $is_opening_token{$token_end}
&& (
$opening_vertical_tightness{$token_end} > 0
|| ( $rOpts_line_up_parentheses
&& $token_end eq '('
&& $iend > $ibeg
&& $types_to_go[ $iend - 1 ] ne 'b' )
)
)
{
my $ovt = $opening_vertical_tightness{$token_end};
my $iend_next = $$ri_last[ $n + 1 ];
unless (
$ovt < 2
&& ( $nesting_depth_to_go[ $iend_next + 1 ] !=
$nesting_depth_to_go[$ibeg_next] )
)
{
my $valid_flag = $ovt;
@{$rvertical_tightness_flags} =
( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
}
}
my $token_next = $tokens_to_go[$ibeg_next];
if ( $type_sequence_to_go[$ibeg_next]
&& !$block_type_to_go[$ibeg_next]
&& $is_closing_token{$token_next}
&& $types_to_go[$iend] !~ '#' ) {
my $ovt = $opening_vertical_tightness{$token_next};
my $cvt = $closing_vertical_tightness{$token_next};
if (
(
$nesting_depth_to_go[$ibeg_next] ==
$nesting_depth_to_go[ $iend_next + 1 ] + 1
)
&& (
$cvt == 2
|| (
$container_environment_to_go[$ibeg_next] ne 'LIST'
&& (
$cvt == 1
|| ( $rOpts_line_up_parentheses
&& $token_next eq ')' )
)
)
)
)
{
my $ok = 0;
if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
else {
my $str = join( '',
@types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
if ( $str =~ /^b?[ }
if ($ok) {
my $valid_flag = $cvt;
@{$rvertical_tightness_flags} = (
2,
$tightness{$token_next} == 2 ? 0 : 1,
$type_sequence_to_go[$ibeg_next], $valid_flag,
);
}
}
}
if (
$opening_token_right{ $tokens_to_go[$ibeg_next] }
&& !$is_opening_token{$token_end}
&& !$block_type_to_go[$ibeg_next]
&& ( $iend_next == $ibeg_next
|| $iend_next == $ibeg_next + 2
&& $types_to_go[$iend_next] eq '#' )
&& $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
)
{
my $valid_flag = 1;
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
@{$rvertical_tightness_flags} =
( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
}
my $stackable;
my $token_beg_next = $tokens_to_go[$ibeg_next];
if ( $types_to_go[$ibeg_next] eq 'q' ) {
if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
$token_beg_next = $1;
}
}
if ( $is_closing_token{$token_end}
&& $is_closing_token{$token_beg_next} )
{
$stackable = $stack_closing_token{$token_beg_next}
unless ( $block_type_to_go[$ibeg_next] )
; }
elsif ($is_opening_token{$token_end}
&& $is_opening_token{$token_beg_next} )
{
$stackable = $stack_opening_token{$token_beg_next}
unless ( $block_type_to_go[$ibeg_next] )
; }
if ($stackable) {
my $is_semicolon_terminated;
if ( $n + 1 == $n_last_line ) {
my ( $terminal_type, $i_terminal ) = terminal_type(
\@types_to_go, \@block_type_to_go,
$ibeg_next, $iend_next
);
$is_semicolon_terminated = $terminal_type eq ';'
&& $nesting_depth_to_go[$iend_next] <
$nesting_depth_to_go[$ibeg_next];
}
if (
$is_semicolon_terminated
|| ( $iend_next == $ibeg_next
|| $iend_next == $ibeg_next + 2
&& $types_to_go[$iend_next] eq '#' )
)
{
my $valid_flag = 1;
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
@{$rvertical_tightness_flags} =
( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
);
}
}
}
elsif ($rOpts_block_brace_vertical_tightness
&& $ibeg eq $iend
&& $types_to_go[$iend] eq '{'
&& $block_type_to_go[$iend] =~
/$block_brace_vertical_tightness_pattern/o )
{
@{$rvertical_tightness_flags} =
( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
}
$rvertical_tightness_flags->[4] = get_seqno($ibeg);
$rvertical_tightness_flags->[5] = get_seqno($iend);
return $rvertical_tightness_flags;
}
sub get_seqno {
my ($ii) = @_;
my $seqno = $type_sequence_to_go[$ii];
if ( $types_to_go[$ii] eq 'q' ) {
my $SEQ_QW = -1;
if ( $ii > 0 ) {
$seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
}
else {
if ( !$ending_in_quote ) {
$seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
}
}
}
return ($seqno);
}
{
my %is_vertical_alignment_type;
my %is_vertical_alignment_keyword;
BEGIN {
@_ = qw = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
{ ? : => =~ && || // ~~ !~~
@is_vertical_alignment_type{@_} = (1) x scalar(@_);
@_ = qw(if unless and or err eq ne for foreach while until);
@is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
}
sub set_vertical_alignment_markers {
if ( !$rOpts_add_whitespace ) {
for my $i ( 0 .. $max_index_to_go ) {
$matching_token_to_go[$i] = '';
}
return;
}
my ( $ri_first, $ri_last ) = @_;
my $i_terminal = $max_index_to_go;
if ( $types_to_go[$i_terminal] eq '#' ) {
if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
if ( $i_terminal > 0 ) { --$i_terminal }
}
}
my $last_vertical_alignment_before_index;
my $vert_last_nonblank_type;
my $vert_last_nonblank_token;
my $vert_last_nonblank_block_type;
my $max_line = @$ri_first - 1;
my ( $i, $type, $token, $block_type, $alignment_type );
my ( $ibeg, $iend, $line );
foreach $line ( 0 .. $max_line ) {
$ibeg = $$ri_first[$line];
$iend = $$ri_last[$line];
$last_vertical_alignment_before_index = -1;
$vert_last_nonblank_type = '';
$vert_last_nonblank_token = '';
$vert_last_nonblank_block_type = '';
foreach $i ( $ibeg .. $iend ) {
$alignment_type = '';
$type = $types_to_go[$i];
$block_type = $block_type_to_go[$i];
$token = $tokens_to_go[$i];
if ( $matching_token_to_go[$i] ) {
$matching_token_to_go[$i] = '';
next;
}
if ( $i < $ibeg + 2 ) { }
elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
elsif ( $type eq '#' ) {
unless (
(
$rOpts->{'static-side-comments'}
&& $token =~ /$static_side_comment_pattern/o
)
|| ( $vert_last_nonblank_block_type
&& $token =~
/$closing_side_comment_prefix_pattern/o )
)
{
$alignment_type = $type;
} }
elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
elsif ( $type eq 'k' ) {
if ( $is_vertical_alignment_keyword{$token} ) {
$alignment_type = $token;
}
}
elsif ( $is_vertical_alignment_type{$type} ) {
$alignment_type = $token;
if ( $i == $iend || $i >= $i_terminal ) {
$alignment_type = "";
}
if ( $i == $ibeg + 2
&& $types_to_go[$ibeg] =~ /^[\.\:]$/
&& $types_to_go[ $i - 1 ] eq 'b' )
{
$alignment_type = "";
}
if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
$alignment_type = ""
unless $vert_last_nonblank_token =~
/^(if|unless|elsif)$/;
}
}
if ($alignment_type) {
$last_vertical_alignment_before_index = $i;
}
if (
!$alignment_type
&& ( $i > $ibeg )
&& $types_to_go[ $i - 1 ] eq 'b'
&& ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
&& ( $type !~ /^[b\
)
{
$alignment_type = $vert_last_nonblank_type;
}
$matching_token_to_go[$i] = $alignment_type;
if ( $type ne 'b' ) {
$vert_last_nonblank_type = $type;
$vert_last_nonblank_token = $token;
$vert_last_nonblank_block_type = $block_type;
}
}
}
}
}
sub terminal_type {
my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
if ( $$rtype[$ibeg] eq '#' ) {
return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
}
else {
for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
next if ( $$rtype[$i] eq 'b' );
next if ( $$rtype[$i] eq '#' );
my $terminal_type = $$rtype[$i];
if (
$terminal_type eq '}'
&& ( !$$rblock_type[$i]
|| ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
)
{
$terminal_type = 'b';
}
return wantarray ? ( $terminal_type, $i ) : $terminal_type;
}
return wantarray ? ( ' ', $ibeg ) : ' ';
}
}
{
my %is_good_keyword_breakpoint;
my %is_lt_gt_le_ge;
sub set_bond_strengths {
BEGIN {
@_ = qw(if unless while until for foreach);
@is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
@_ = qw(lt gt le ge);
@is_lt_gt_le_ge{@_} = (1) x scalar(@_);
$left_bond_strength{'Z'} = NO_BREAK;
$right_bond_strength{'Z'} = NO_BREAK;
$left_bond_strength{'w'} = NO_BREAK;
$right_bond_strength{'b'} = NO_BREAK;
@_ = qw" ** .. ... <=> ";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} = (STRONG) x scalar(@_);
$left_bond_strength{'=>'} = NO_BREAK;
$right_bond_strength{'=>'} = NOMINAL;
$left_bond_strength{'J'} = NO_BREAK;
$right_bond_strength{'J'} = NOMINAL;
$left_bond_strength{'j'} = STRONG;
$right_bond_strength{'j'} = STRONG;
$left_bond_strength{'A'} = STRONG;
$right_bond_strength{'A'} = STRONG;
$left_bond_strength{'->'} = STRONG;
$right_bond_strength{'->'} = VERY_STRONG;
@_ = qw" % ";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
@_ = qw" * / x ";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} = (NOMINAL) x scalar(@_);
@_ = qw" + - ";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
@_ = qw" >> << ";
@right_bond_strength{@_} = (STRONG) x scalar(@_);
@left_bond_strength{@_} = (NOMINAL) x scalar(@_);
$right_bond_strength{'.'} = STRONG;
$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
@_ = qw"} ] ) ";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} = (NOMINAL) x scalar(@_);
@_ = qw"!= == =~ !~ ~~ !~~";
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
@_ = qw" < > | & >= <=";
@left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
$left_bond_strength{'Q'} = NOMINAL;
$right_bond_strength{'Q'} = NOMINAL + 0.02;
$left_bond_strength{'q'} = NOMINAL;
$right_bond_strength{'q'} = NOMINAL;
$left_bond_strength{'k'} = NOMINAL;
$right_bond_strength{'k'} = STRONG;
$left_bond_strength{'G'} = NOMINAL;
$right_bond_strength{'G'} = STRONG;
@_ = qw(
= **= += *= &= <<= &&=
-= /= |= >>= ||= //=
.= %= ^=
x=
);
@left_bond_strength{@_} = (STRONG) x scalar(@_);
@right_bond_strength{@_} =
( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
$right_bond_strength{'||'} = NOMINAL;
$left_bond_strength{'||'} = $right_bond_strength{'='};
$right_bond_strength{'//'} = NOMINAL;
$left_bond_strength{'//'} = $right_bond_strength{'='};
$right_bond_strength{'&&'} = NOMINAL;
$left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
$left_bond_strength{';'} = VERY_STRONG;
$right_bond_strength{';'} = VERY_WEAK;
$left_bond_strength{'f'} = VERY_STRONG;
$right_bond_strength{'f'} = VERY_WEAK - 0.03;
$left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
$right_bond_strength{':'} = NO_BREAK;
$left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
$right_bond_strength{'?'} = NO_BREAK;
$left_bond_strength{','} = VERY_STRONG;
$right_bond_strength{','} = VERY_WEAK;
$left_bond_strength{'and'} = VERY_WEAK - 0.01;
$left_bond_strength{'or'} = VERY_WEAK - 0.02;
$left_bond_strength{'err'} = VERY_WEAK - 0.02;
$left_bond_strength{'xor'} = NOMINAL;
$right_bond_strength{'and'} = NOMINAL;
$right_bond_strength{'or'} = NOMINAL;
$right_bond_strength{'err'} = NOMINAL;
$right_bond_strength{'xor'} = STRONG;
}
$nobreak_to_go[$max_index_to_go] = 0;
my $colon_bias = 0;
my $amp_bias = 0;
my $bar_bias = 0;
my $and_bias = 0;
my $or_bias = 0;
my $dot_bias = 0;
my $f_bias = 0;
my $code_bias = -.01;
my $type = 'b';
my $token = ' ';
my $last_type;
my $last_nonblank_type = $type;
my $last_nonblank_token = $token;
my $delta_bias = 0.0001;
my $list_str = $left_bond_strength{'?'};
my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
$next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
);
for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
$last_type = $type;
if ( $type ne 'b' ) {
$last_nonblank_type = $type;
$last_nonblank_token = $token;
}
$type = $types_to_go[$i];
if ( $type eq 'b' && $last_type ne 'b' ) {
$bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
next;
}
$token = $tokens_to_go[$i];
$block_type = $block_type_to_go[$i];
$i_next = $i + 1;
$next_type = $types_to_go[$i_next];
$next_token = $tokens_to_go[$i_next];
$total_nesting_depth = $nesting_depth_to_go[$i_next];
$i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $bond_str = VERY_STRONG;
my $bsr = $right_bond_strength{$type};
if ( !defined($bsr) ) {
if ( $is_digraph{$type} || $is_trigraph{$type} ) {
$bsr = STRONG;
}
else {
$bsr = VERY_STRONG;
}
}
if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
$bsr = $right_bond_strength{$token};
}
elsif ( $token eq 'ne' or $token eq 'eq' ) {
$bsr = NOMINAL;
}
my $bsl = $left_bond_strength{$next_nonblank_type};
if ( $i_next_nonblank > $max_index_to_go ) {
$bsl = NOMINAL;
}
if ( !defined($bsl) ) {
if ( $is_digraph{$next_nonblank_type}
|| $is_trigraph{$next_nonblank_type} )
{
$bsl = WEAK;
}
else {
$bsl = VERY_STRONG;
}
}
if ( $next_nonblank_type eq 'k'
&& defined( $left_bond_strength{$next_nonblank_token} ) )
{
$bsl = $left_bond_strength{$next_nonblank_token};
}
elsif ($next_nonblank_token eq 'ne'
or $next_nonblank_token eq 'eq' )
{
$bsl = NOMINAL;
}
elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
$bsl = 0.9 * NOMINAL + 0.1 * STRONG;
}
$bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
my $bond_str_1 = $bond_str;
if ( $type eq ')' ) {
if ( $next_nonblank_type eq '{' ) {
$bond_str = VERY_WEAK + 0.03;
}
}
elsif ( $type eq '(' ) {
if ( $next_nonblank_type eq '{' ) {
$bond_str = NOMINAL;
}
}
elsif ( $type eq 'R' or $type eq '}' ) {
if ( $next_nonblank_type eq '(' ) {
$bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
}
}
elsif ( $type eq ',' ) {
$bond_str += $bond_strength_to_go[$i];
}
elsif ( $type eq 'f' ) {
$bond_str += $f_bias;
$f_bias += $delta_bias;
}
elsif ( $type eq ':' ) {
if ( !$want_break_before{$type} ) {
$bond_str += $colon_bias;
$colon_bias += $delta_bias;
}
}
if ( $next_nonblank_type eq ':'
&& $want_break_before{$next_nonblank_type} )
{
$bond_str += $colon_bias;
$colon_bias += $delta_bias;
}
elsif ( $next_nonblank_type eq '.' ) {
if ( $want_break_before{'.'} ) {
unless (
$last_nonblank_type eq '.'
&& (
length($token) <=
$rOpts_short_concatenation_item_length )
&& ( $token !~ /^[\)\]\}]$/ )
)
{
$dot_bias += $delta_bias;
}
$bond_str += $dot_bias;
}
}
elsif ($next_nonblank_type eq '&&'
&& $want_break_before{$next_nonblank_type} )
{
$bond_str += $amp_bias;
$amp_bias += $delta_bias;
}
elsif ($next_nonblank_type eq '||'
&& $want_break_before{$next_nonblank_type} )
{
$bond_str += $bar_bias;
$bar_bias += $delta_bias;
}
elsif ( $next_nonblank_type eq 'k' ) {
if ( $next_nonblank_token eq 'and'
&& $want_break_before{$next_nonblank_token} )
{
$bond_str += $and_bias;
$and_bias += $delta_bias;
}
elsif ($next_nonblank_token =~ /^(or|err)$/
&& $want_break_before{$next_nonblank_token} )
{
$bond_str += $or_bias;
$or_bias += $delta_bias;
}
elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
$bond_str = $list_str if ( $bond_str > $list_str );
}
elsif ( $token eq 'err'
&& !$want_break_before{$token} )
{
$bond_str += $or_bias;
$or_bias += $delta_bias;
}
}
if ( $type eq ':'
&& !$want_break_before{$type} )
{
$bond_str += $colon_bias;
$colon_bias += $delta_bias;
}
elsif ( $type eq '&&'
&& !$want_break_before{$type} )
{
$bond_str += $amp_bias;
$amp_bias += $delta_bias;
}
elsif ( $type eq '||'
&& !$want_break_before{$type} )
{
$bond_str += $bar_bias;
$bar_bias += $delta_bias;
}
elsif ( $type eq 'k' ) {
if ( $token eq 'and'
&& !$want_break_before{$token} )
{
$bond_str += $and_bias;
$and_bias += $delta_bias;
}
elsif ( $token eq 'or'
&& !$want_break_before{$token} )
{
$bond_str += $or_bias;
$or_bias += $delta_bias;
}
}
if ( ( $type eq ']' or $type eq 'R' )
&& ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
)
{
$bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
}
if ( $next_nonblank_token =~ /^->/ ) {
if ( $type eq 'i' ) {
$bond_str = 1.45 * STRONG;
}
elsif ( $type =~ /^[\)\]\}R]$/ ) {
$bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
}
else {
if ( $bond_str <= NOMINAL ) {
$bond_str = NOMINAL + 0.01;
}
}
}
if ( $token eq ')' && $next_nonblank_token eq '[' ) {
$bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
}
if ( $token eq '('
&& $next_nonblank_type eq 'i'
&& $last_nonblank_type eq 'k'
&& $is_sort_map_grep{$last_nonblank_token} )
{
$bond_str = NO_BREAK;
}
if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
$bond_str = NO_BREAK;
}
if ( $type eq '}' && $block_type ) {
$bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
$code_bias += $delta_bias;
}
if ( $type eq 'k' ) {
if ( $next_nonblank_type eq 'k'
&& $is_last_next_redo_return{$token} )
{
$bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
}
if ( $token eq 'my' ) {
$bond_str = NO_BREAK;
}
}
if ( $is_if_brace_follower{$next_nonblank_token} ) {
$bond_str = VERY_WEAK;
}
if ( $next_nonblank_type eq 'k' ) {
if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
$bond_str = VERY_WEAK / 1.05;
}
}
elsif ( $next_nonblank_type eq '=>' ) {
if ( $bond_str < STRONG ) { $bond_str = STRONG }
}
if ( $type eq 'C' or $type eq 'U' ) {
if ( $next_nonblank_type eq '=>' ) {
$bond_str = NO_BREAK;
}
if ( $next_nonblank_token eq '(' ) {
$bond_str = NO_BREAK;
}
}
elsif ( $type eq 'L' ) {
if ( $next_nonblank_type eq 'w' ) {
$bond_str = NO_BREAK;
}
}
elsif ( $type eq '{' ) {
if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
my $i_next_next_nonblank = $i_next_nonblank + 1;
my $next_next_type = $types_to_go[$i_next_next_nonblank];
if ( $next_next_type eq 'b'
&& $i_next_nonblank < $max_index_to_go )
{
$i_next_next_nonblank++;
$next_next_type = $types_to_go[$i_next_next_nonblank];
}
if ( !$old_breakpoint_to_go[$i]
&& ( $next_next_type eq ',' || $next_next_type eq '}' )
)
{
$bond_str = NO_BREAK;
}
}
}
elsif ( $type eq 'w' ) {
if ( $next_nonblank_type eq 'R' ) {
$bond_str = NO_BREAK;
}
if ( $next_nonblank_type eq '=>' ) {
$bond_str = NO_BREAK;
}
}
elsif ( $type eq 'F' ) {
$bond_str = NO_BREAK;
}
elsif ( $type eq 't' or $type eq 'i' ) {
if ( $next_nonblank_type eq 'L' ) {
$bond_str = NO_BREAK;
}
}
elsif ( $type eq 'Z' ) {
if (
(
$next_type ne 'b'
&& defined( $want_left_space{$next_type} )
&& $want_left_space{$next_type} == WS_NO
)
|| $next_nonblank_type =~ /^[\/\?]$/
)
{
$bond_str = NO_BREAK;
}
}
if ( $next_nonblank_type eq 'Z' ) {
$bond_str = NO_BREAK;
}
if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
$bond_str = NO_BREAK;
}
elsif ( $next_nonblank_type eq '++' ) {
$bond_str = NO_BREAK;
}
elsif ( $next_nonblank_type eq '?' ) {
$bond_str = NO_BREAK
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
}
elsif ( $next_nonblank_type eq '.' ) {
$bond_str = NO_BREAK
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
}
if ($rOpts_cuddled_else) {
if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
|| ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
{
$bond_str = NO_BREAK;
}
}
if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
$bond_str = NO_BREAK;
}
if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
$bond_str = NO_BREAK;
}
my $strength;
if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
if ( $total_nesting_depth > 0 ) {
$strength = $bond_str + $total_nesting_depth;
}
else {
$strength = $bond_str;
}
}
else {
$strength = NO_BREAK;
}
if ( $type eq '#' ) { $strength = 0 }
$bond_strength_to_go[$i] = $strength;
FORMATTER_DEBUG_FLAG_BOND && do {
my $str = substr( $token, 0, 15 );
$str .= ' ' x ( 16 - length($str) );
print
"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
};
}
}
}
sub pad_array_to_go {
$tokens_to_go[ $max_index_to_go + 1 ] = '';
$tokens_to_go[ $max_index_to_go + 2 ] = '';
$types_to_go[ $max_index_to_go + 1 ] = 'b';
$types_to_go[ $max_index_to_go + 2 ] = 'b';
$nesting_depth_to_go[ $max_index_to_go + 1 ] =
$nesting_depth_to_go[$max_index_to_go];
if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
unless ( get_saw_brace_error() ) {
warning(
"Program bug in scan_list: hit nesting error which should have been caught\n"
);
report_definite_bug();
}
}
else {
$nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
}
}
elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
$nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
}
}
{
my (
$block_type, $current_depth,
$depth, $i,
$i_last_nonblank_token, $last_colon_sequence_number,
$last_nonblank_token, $last_nonblank_type,
$last_old_breakpoint_count, $minimum_depth,
$next_nonblank_block_type, $next_nonblank_token,
$next_nonblank_type, $old_breakpoint_count,
$starting_breakpoint_count, $starting_depth,
$token, $type,
$type_sequence,
);
my (
@breakpoint_stack, @breakpoint_undo_stack,
@comma_index, @container_type,
@identifier_count_stack, @index_before_arrow,
@interrupted_list, @item_count_stack,
@last_comma_index, @last_dot_index,
@last_nonblank_type, @old_breakpoint_count_stack,
@opening_structure_index_stack, @rfor_semicolon_list,
@has_old_logical_breakpoints, @rand_or_list,
@i_equals,
);
sub check_for_new_minimum_depth {
my $depth = shift;
if ( $depth < $minimum_depth ) {
$minimum_depth = $depth;
$breakpoint_stack[$depth] = $starting_breakpoint_count;
$container_type[$depth] = "";
$identifier_count_stack[$depth] = 0;
$index_before_arrow[$depth] = -1;
$interrupted_list[$depth] = 1;
$item_count_stack[$depth] = 0;
$last_nonblank_type[$depth] = "";
$opening_structure_index_stack[$depth] = -1;
$breakpoint_undo_stack[$depth] = undef;
$comma_index[$depth] = undef;
$last_comma_index[$depth] = undef;
$last_dot_index[$depth] = undef;
$old_breakpoint_count_stack[$depth] = undef;
$has_old_logical_breakpoints[$depth] = 0;
$rand_or_list[$depth] = [];
$rfor_semicolon_list[$depth] = [];
$i_equals[$depth] = -1;
if ( !defined( $has_broken_sublist[$depth] ) ) {
$dont_align[$depth] = 0;
$has_broken_sublist[$depth] = 0;
$want_comma_break[$depth] = 0;
}
}
}
sub set_comma_breakpoints {
my $dd = shift;
my $bp_count = 0;
my $do_not_break_apart = 0;
if ( $item_count_stack[$dd] ) {
if ( $dont_align[$dd] ) {
do_uncontained_comma_breaks($dd);
}
else {
my $fbc = $forced_breakpoint_count;
my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
set_comma_breakpoints_do(
$dd,
$opening_structure_index_stack[$dd],
$i,
$item_count_stack[$dd],
$identifier_count_stack[$dd],
$comma_index[$dd],
$next_nonblank_type,
$container_type[$dd],
$interrupted_list[$dd],
\$do_not_break_apart,
$must_break_open,
);
$bp_count = $forced_breakpoint_count - $fbc;
$do_not_break_apart = 0 if $must_break_open;
}
}
return ( $bp_count, $do_not_break_apart );
}
sub do_uncontained_comma_breaks {
my $dd = shift;
my $bias = -.01;
foreach my $ii ( @{ $comma_index[$dd] } ) {
if ( $old_breakpoint_to_go[$ii] ) {
$bond_strength_to_go[$ii] = $bias;
$bias *= 0.99;
}
}
my $i_first_comma = $comma_index[$dd]->[0];
if ( $old_breakpoint_to_go[$i_first_comma] ) {
my $level_comma = $levels_to_go[$i_first_comma];
my $ibreak = -1;
my $obp_count = 0;
for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
if ( $old_breakpoint_to_go[$ii] ) {
$obp_count++;
last if ( $obp_count > 1 );
$ibreak = $ii
if ( $levels_to_go[$ii] == $level_comma );
}
}
if ( $ibreak >= 0 && $obp_count == 1 ) {
set_forced_breakpoint($ibreak);
}
}
}
my %is_logical_container;
BEGIN {
@_ = qw @is_logical_container{@_} = (1) x scalar(@_);
}
sub set_for_semicolon_breakpoints {
my $dd = shift;
foreach ( @{ $rfor_semicolon_list[$dd] } ) {
set_forced_breakpoint($_);
}
}
sub set_logical_breakpoints {
my $dd = shift;
if (
$item_count_stack[$dd] == 0
&& $is_logical_container{ $container_type[$dd] }
|| $has_old_logical_breakpoints[$dd]
)
{
foreach my $i ( 0 .. 3 ) {
if ( $rand_or_list[$dd][$i] ) {
foreach ( @{ $rand_or_list[$dd][$i] } ) {
set_forced_breakpoint($_);
}
foreach ( @{ $rand_or_list[$dd][4] } ) {
set_forced_breakpoint($_);
}
$rand_or_list[$dd] = [];
last;
}
}
}
}
sub is_unbreakable_container {
my $dd = shift;
$is_sort_map_grep{ $container_type[$dd] };
}
sub scan_list {
$starting_depth = $nesting_depth_to_go[0];
$block_type = ' ';
$current_depth = $starting_depth;
$i = -1;
$last_colon_sequence_number = -1;
$last_nonblank_token = ';';
$last_nonblank_type = ';';
$last_nonblank_block_type = ' ';
$last_old_breakpoint_count = 0;
$minimum_depth = $current_depth + 1; $old_breakpoint_count = 0;
$starting_breakpoint_count = $forced_breakpoint_count;
$token = ';';
$type = ';';
$type_sequence = '';
check_for_new_minimum_depth($current_depth);
my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
my $want_previous_breakpoint = -1;
my $saw_good_breakpoint;
my $i_line_end = -1;
my $i_line_start = -1;
while ( ++$i <= $max_index_to_go ) {
if ( $type ne 'b' ) {
$i_last_nonblank_token = $i - 1;
$last_nonblank_type = $type;
$last_nonblank_token = $token;
$last_nonblank_block_type = $block_type;
}
$type = $types_to_go[$i];
$block_type = $block_type_to_go[$i];
$token = $tokens_to_go[$i];
$type_sequence = $type_sequence_to_go[$i];
my $next_type = $types_to_go[ $i + 1 ];
my $next_token = $tokens_to_go[ $i + 1 ];
my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
$next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
if ( $want_previous_breakpoint >= 0 ) {
set_forced_breakpoint($want_previous_breakpoint);
$want_previous_breakpoint = -1;
}
$last_old_breakpoint_count = $old_breakpoint_count;
if ( $old_breakpoint_to_go[$i] ) {
$i_line_end = $i;
$i_line_start = $i_next_nonblank;
$old_breakpoint_count++;
if ($rOpts_break_at_old_keyword_breakpoints) {
if (
$next_nonblank_type eq 'k'
&& $is_keyword_returning_list{$next_nonblank_token}
&& ( $type =~ /^[=\)\]\}Riw]$/
|| $type eq 'k'
&& $is_keyword_returning_list{$token} )
)
{
$want_previous_breakpoint = $i;
}
}
}
next if ( $type eq 'b' );
$depth = $nesting_depth_to_go[ $i + 1 ];
if ( $type eq '#' ) {
if ( $i != $max_index_to_go ) {
warning(
"Non-fatal program bug: backup logic needed to break after a comment\n"
);
report_definite_bug();
$nobreak_to_go[$i] = 0;
set_forced_breakpoint($i);
}
}
if (
$type eq 'k'
&& $i > 0
&& $token =~ /^(if|unless|while|until|for)$/
&& ( $last_nonblank_type ne 'n' || $i > 2 )
&& $last_nonblank_block_type ne 'do'
&& (
$is_long_line
|| ( $next_nonblank_token eq '('
&& $mate_index_to_go[$i_next_nonblank] < $i )
)
)
{
set_forced_breakpoint( $i - 1 );
}
if ( $type eq '||' ) {
push @{ $rand_or_list[$depth][2] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
elsif ( $type eq '&&' ) {
push @{ $rand_or_list[$depth][3] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
elsif ( $type eq 'f' ) {
push @{ $rfor_semicolon_list[$depth] }, $i;
}
elsif ( $type eq 'k' ) {
if ( $token eq 'and' ) {
push @{ $rand_or_list[$depth][1] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
}
elsif ( $token eq 'or' ) {
push @{ $rand_or_list[$depth][0] }, $i;
++$has_old_logical_breakpoints[$depth]
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints );
if ( $is_logical_container{ $container_type[$depth] } ) {
}
else {
if ($is_long_line) { set_forced_breakpoint($i) }
elsif ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints )
{
$saw_good_breakpoint = 1;
}
}
}
elsif ( $token eq 'if' || $token eq 'unless' ) {
push @{ $rand_or_list[$depth][4] }, $i;
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_logical_breakpoints )
{
set_forced_breakpoint($i);
}
}
}
elsif ( $is_assignment{$type} ) {
$i_equals[$depth] = $i;
}
if ($type_sequence) {
if ( $token =~ /^[\)\]\}\:]$/ ) {
if ( $type eq ':' ) {
$last_colon_sequence_number = $type_sequence;
if ( ( $i == $i_line_start || $i == $i_line_end )
&& $rOpts_break_at_old_ternary_breakpoints )
{
set_forced_breakpoint($i);
if ( $i_equals[$depth] > 0 ) {
set_forced_breakpoint( $i_equals[$depth] );
$i_equals[$depth] = -1;
}
}
}
if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
my $inc = ( $type eq ':' ) ? 0 : 1;
set_forced_breakpoint( $i - $inc );
delete $postponed_breakpoint{$type_sequence};
}
}
elsif ( $token eq '?' ) {
my $i_colon = $mate_index_to_go[$i];
if (
$i_colon <= 0 || $i == 0 || $i ==
$max_index_to_go )
{
set_forced_breakpoint($i)
unless (
$type_sequence == (
$last_colon_sequence_number +
TYPE_SEQUENCE_INCREMENT
)
|| $tokens_to_go[$max_index_to_go] eq '#'
);
set_closing_breakpoint($i);
}
}
}
if ( $depth > $current_depth ) {
$breakpoint_stack[$depth] = $forced_breakpoint_count;
$breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
$has_broken_sublist[$depth] = 0;
$identifier_count_stack[$depth] = 0;
$index_before_arrow[$depth] = -1;
$interrupted_list[$depth] = 0;
$item_count_stack[$depth] = 0;
$last_comma_index[$depth] = undef;
$last_dot_index[$depth] = undef;
$last_nonblank_type[$depth] = $last_nonblank_type;
$old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
$opening_structure_index_stack[$depth] = $i;
$rand_or_list[$depth] = [];
$rfor_semicolon_list[$depth] = [];
$i_equals[$depth] = -1;
$want_comma_break[$depth] = 0;
$container_type[$depth] =
( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
? $last_nonblank_token
: "";
$has_old_logical_breakpoints[$depth] = 0;
if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
{
set_closing_breakpoint($i);
}
$dont_align[$depth] =
( $block_type ne "" )
|| ( $type eq '(' ) && (
( $last_nonblank_type eq 'k' )
|| ( $next_nonblank_type eq '(' )
);
if (
$block_type
&& ( $last_nonblank_token eq ')' )
&& $mate_index_to_go[$i_last_nonblank_token] < 0
&& !$rOpts->{'opening-brace-always-on-right'}
&& ( $type eq '{' ) && ( $token eq '{' ) )
{
set_forced_breakpoint( $i - 1 );
}
}
elsif ( $depth < $current_depth ) {
check_for_new_minimum_depth($depth);
$has_old_logical_breakpoints[$depth] ||=
$has_old_logical_breakpoints[$current_depth];
if ( $token eq ')'
&& $next_nonblank_block_type
&& $interrupted_list[$current_depth]
&& $next_nonblank_type eq '{'
&& !$rOpts->{'opening-brace-always-on-right'} )
{
set_forced_breakpoint($i);
}
my ( $bp_count, $do_not_break_apart ) =
set_comma_breakpoints($current_depth);
my $i_opening = $opening_structure_index_stack[$current_depth];
my $saw_opening_structure = ( $i_opening >= 0 );
my $is_long_term = $bp_count > 0;
if ( !$is_long_term && $saw_opening_structure ) {
my $i_opening_minus = find_token_starting_list($i_opening);
$is_long_term =
excess_line_length( $i_opening_minus, $i ) >= 0;
}
if (
( $rOpts_comma_arrow_breakpoints != 0 )
&& $saw_opening_structure
&& (
$old_breakpoint_count_stack[$current_depth] ==
$last_old_breakpoint_count
|| $rOpts_comma_arrow_breakpoints == 2
)
&& ( $breakpoint_undo_stack[$current_depth] <
$forced_breakpoint_undo_count )
&& !$is_long_term
)
{
undo_forced_breakpoint_stack(
$breakpoint_undo_stack[$current_depth] );
}
my $has_comma_breakpoints =
( $breakpoint_stack[$current_depth] !=
$forced_breakpoint_count );
$has_broken_sublist[$depth] =
$has_broken_sublist[$depth]
|| $has_broken_sublist[$current_depth]
|| $is_long_term
|| $has_comma_breakpoints;
my $is_simple_logical_expression = 0;
if ( $item_count_stack[$current_depth] == 0
&& $saw_opening_structure
&& $tokens_to_go[$i_opening] eq '('
&& $is_logical_container{ $container_type[$current_depth] }
)
{
if ( !$has_comma_breakpoints ) {
$is_simple_logical_expression = 1;
}
else {
set_logical_breakpoints($current_depth);
}
}
if ( $is_long_term
&& @{ $rfor_semicolon_list[$current_depth] } )
{
set_for_semicolon_breakpoints($current_depth);
$has_comma_breakpoints = 1
unless $rOpts_line_up_parentheses;
}
if (
!$block_type
&& !$is_simple_logical_expression
&& (
$has_comma_breakpoints
|| !$saw_opening_structure
|| ( $is_long_term
&& $container_environment_to_go[$i_opening] ne
'BLOCK' )
)
)
{
if ( $rOpts_line_up_parentheses && $saw_opening_structure )
{
my $item = $leading_spaces_to_go[ $i_opening + 1 ];
if ( $i_opening + 1 < $max_index_to_go
&& $types_to_go[ $i_opening + 1 ] eq 'b' )
{
$item = $leading_spaces_to_go[ $i_opening + 2 ];
}
if ( defined($item) ) {
my $i_start_2 = $item->get_STARTING_INDEX();
if (
defined($i_start_2)
&& $i_start_2 ne $i_opening
)
{
my $test1 = $nesting_depth_to_go[$i_opening];
my $test2 = $nesting_depth_to_go[$i_start_2];
if ( $test2 == $test1 ) {
set_forced_breakpoint( $i_start_2 - 1 );
}
}
}
}
if ( $minimum_depth <= $current_depth ) {
set_forced_breakpoint($i_opening)
unless ( $do_not_break_apart
|| is_unbreakable_container($current_depth) );
if ( $last_dot_index[$depth] ) {
set_forced_breakpoint( $last_dot_index[$depth] );
}
if ( $i_opening > 2 ) {
my $i_prev =
( $types_to_go[ $i_opening - 1 ] eq 'b' )
? $i_opening - 2
: $i_opening - 1;
if ( $types_to_go[$i_prev] eq ','
&& $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
{
set_forced_breakpoint($i_prev);
}
elsif (
$types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
{
my $token_prev = $tokens_to_go[$i_prev];
if ( $want_break_before{$token_prev} ) {
set_forced_breakpoint($i_prev);
}
}
}
}
if ( $next_type eq ',' ) {
set_forced_breakpoint( $i + 1 );
}
if (
$is_assignment{$next_nonblank_type}
&& ( $breakpoint_stack[$current_depth] !=
$forced_breakpoint_count )
)
{
set_forced_breakpoint($i);
}
my $icomma = $last_comma_index[$depth];
if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
unless ( $forced_breakpoint_to_go[$icomma] ) {
set_forced_breakpoint($icomma);
}
}
}
elsif ($is_simple_logical_expression
&& $has_old_logical_breakpoints[$current_depth] )
{
set_logical_breakpoints($current_depth);
}
elsif ($is_long_term) {
set_fake_breakpoint();
}
}
$current_depth = $depth;
if ( $type eq '=>' ) {
next if ( $last_nonblank_type eq '=>' );
next if $rOpts_break_at_old_comma_breakpoints;
next if $rOpts_comma_arrow_breakpoints == 3;
$want_comma_break[$depth] = 1;
$index_before_arrow[$depth] = $i_last_nonblank_token;
next;
}
elsif ( $type eq '.' ) {
$last_dot_index[$depth] = $i;
}
elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
&& $container_environment_to_go[$i] ne 'LIST' )
{
$dont_align[$depth] = 1;
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
}
next unless ( $type eq ',' );
$last_dot_index[$depth] = undef;
$last_comma_index[$depth] = $i;
if ( $want_comma_break[$depth] ) {
if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
next;
}
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
my $ibreak = $index_before_arrow[$depth] - 1;
if ( $ibreak > 0
&& $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
{
if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
set_forced_breakpoint($ibreak);
}
}
}
$want_comma_break[$depth] = 0;
$index_before_arrow[$depth] = -1;
$interrupted_list[$depth] = 1;
next;
}
if ( $depth < $starting_depth && !$dont_align[$depth] ) {
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
next;
}
my $item_count = $item_count_stack[$depth];
if ( $item_count == 0 ) {
if ( ( $opening_structure_index_stack[$depth] < 0 )
&& $container_environment_to_go[$i] eq 'BLOCK' )
{
$dont_align[$depth] = 1;
}
}
$comma_index[$depth][$item_count] = $i;
++$item_count_stack[$depth];
if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
$identifier_count_stack[$depth]++;
}
}
for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
$interrupted_list[$dd] = 1;
$has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
set_comma_breakpoints($dd);
set_logical_breakpoints($dd)
if ( $has_old_logical_breakpoints[$dd] );
set_for_semicolon_breakpoints($dd);
my $i_opening = $opening_structure_index_stack[$dd];
set_forced_breakpoint($i_opening)
unless (
is_unbreakable_container($dd)
|| ( $type eq 'Q'
&& $i_opening >= $max_index_to_go - 2
&& $token =~ /^['"]$/ )
);
}
# Return a flag indicating if the input file had some good breakpoints.
# This flag will be used to force a break in a line shorter than the
# allowed line length.
if ( $has_old_logical_breakpoints[$current_depth] ) {
$saw_good_breakpoint = 1;
}
return $saw_good_breakpoint;
}
} # end scan_list
sub find_token_starting_list {
# When testing to see if a block will fit on one line, some
# previous token(s) may also need to be on the line; particularly
# if this is a sub call. So we will look back at least one
# token. NOTE: This isn't perfect, but not critical, because
my $i_opening_paren = shift;
my $i_opening_minus = $i_opening_paren;
my $im1 = $i_opening_paren - 1;
my $im2 = $i_opening_paren - 2;
my $im3 = $i_opening_paren - 3;
my $typem1 = $types_to_go[$im1];
my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
$i_opening_minus = $i_opening_paren;
}
elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
$i_opening_minus = $im1 if $im1 >= 0;
for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
$i_opening_minus = $j;
}
if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
}
elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
$i_opening_minus = $im2;
}
return $i_opening_minus;
}
{
my %is_keyword_with_special_leading_term;
BEGIN {
@_ =
qw(formline grep kill map printf sprintf push chmod join pack unshift);
@is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
}
sub set_comma_breakpoints_do {
my (
$depth, $i_opening_paren, $i_closing_paren,
$item_count, $identifier_count, $rcomma_index,
$next_nonblank_type, $list_type, $interrupted,
$rdo_not_break_apart, $must_break_open,
) = @_;
return if ( $item_count < 1 );
my $i_first_comma = $$rcomma_index[0];
my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
my $i_last_comma = $i_true_last_comma;
if ( $i_last_comma >= $max_index_to_go ) {
$i_last_comma = $$rcomma_index[ --$item_count - 1 ];
return if ( $item_count < 1 );
}
my $comma_count = $item_count;
my @item_lengths;
my @i_term_begin;
my @i_term_end;
my @i_term_comma;
my $i_prev_plus;
my @max_length = ( 0, 0 );
my $first_term_length;
my $i = $i_opening_paren;
my $is_odd = 1;
for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
$is_odd = 1 - $is_odd;
$i_prev_plus = $i + 1;
$i = $$rcomma_index[$j];
my $i_term_end =
( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
my $i_term_begin =
( $types_to_go[$i_prev_plus] eq 'b' )
? $i_prev_plus + 1
: $i_prev_plus;
push @i_term_begin, $i_term_begin;
push @i_term_end, $i_term_end;
push @i_term_comma, $i;
my $length =
2 + token_sequence_length( $i_term_begin, $i_term_end );
push @item_lengths, $length;
if ( $j == 0 ) {
$first_term_length = $length;
}
else {
if ( $length > $max_length[$is_odd] ) {
$max_length[$is_odd] = $length;
}
}
}
my $i_b =
( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
? $i_last_comma + 1
: $i_last_comma;
my $i_e =
( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
? $i_closing_paren - 2
: $i_closing_paren - 1;
my $i_effective_last_comma = $i_last_comma;
my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
if ( $last_item_length > 0 ) {
$last_item_length += 2;
push @item_lengths, $last_item_length;
push @i_term_begin, $i_b + 1;
push @i_term_end, $i_e;
push @i_term_comma, undef;
my $i_odd = $item_count % 2;
if ( $last_item_length > $max_length[$i_odd] ) {
$max_length[$i_odd] = $last_item_length;
}
$item_count++;
$i_effective_last_comma = $i_e + 1;
if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
$identifier_count++;
}
}
if ( $has_broken_sublist[$depth] ) {
my $small_length = 10;
my $skipped_count = 0;
my $columns = table_columns_available($i_first_comma);
my $fields = int( $columns / $small_length );
if ( $rOpts_maximum_fields_per_table
&& $fields > $rOpts_maximum_fields_per_table )
{
$fields = $rOpts_maximum_fields_per_table;
}
my $max_skipped_count = $fields - 1;
my $is_simple_last_term = 0;
my $is_simple_next_term = 0;
foreach my $j ( 0 .. $item_count ) {
$is_simple_last_term = $is_simple_next_term;
$is_simple_next_term = 0;
if ( $j < $item_count
&& $i_term_end[$j] == $i_term_begin[$j]
&& $item_lengths[$j] <= $small_length )
{
$is_simple_next_term = 1;
}
next if $j == 0;
if ( $is_simple_last_term
&& $is_simple_next_term
&& $skipped_count < $max_skipped_count )
{
$skipped_count++;
}
else {
$skipped_count = 0;
my $i = $i_term_comma[ $j - 1 ];
last unless defined $i;
set_forced_breakpoint($i);
}
}
if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
return;
}
if ( $rOpts_break_at_old_comma_breakpoints
|| $interrupted
|| $i_opening_paren < 0 )
{
copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
return;
}
my $opening_token = $tokens_to_go[$i_opening_paren];
my $opening_environment =
$container_environment_to_go[$i_opening_paren];
my $i_opening_minus = find_token_starting_list($i_opening_paren);
return
unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
set_fake_breakpoint();
if ( $i_effective_last_comma >= $max_index_to_go ) {
$i_effective_last_comma = $max_index_to_go - 1;
}
my $need_lp_break_open = $must_break_open;
if ( $rOpts_line_up_parentheses && !$must_break_open ) {
my $columns_if_unbroken = $rOpts_maximum_line_length -
total_line_length( $i_opening_minus, $i_opening_paren );
$need_lp_break_open =
( $max_length[0] > $columns_if_unbroken )
|| ( $max_length[1] > $columns_if_unbroken )
|| ( $first_term_length > $columns_if_unbroken );
}
my $odd_or_even = 2;
if ( $identifier_count >= $item_count - 1
|| $is_assignment{$next_nonblank_type}
|| ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
)
{
$odd_or_even = 1;
}
my $use_separate_first_term = (
$odd_or_even == 1 && $item_count > 3 && $first_term_length >
2 * $max_length[0] - 2 && $first_term_length >
2 * $max_length[1] - 2 );
if ( !$use_separate_first_term ) {
if ( $is_keyword_with_special_leading_term{$list_type} ) {
$use_separate_first_term = 1;
if ( $item_count < 3 ) {
if ( $i_first_comma - $i_opening_paren < 4 ) {
$$rdo_not_break_apart = 1;
}
}
elsif ($first_term_length < 20
&& $i_first_comma - $i_opening_paren < 4 )
{
my $columns = table_columns_available($i_first_comma);
if ( $first_term_length < $columns ) {
$$rdo_not_break_apart = 1;
}
}
}
}
if ($use_separate_first_term) {
$use_separate_first_term = 1;
set_forced_breakpoint($i_first_comma);
$i_opening_paren = $i_first_comma;
$i_first_comma = $$rcomma_index[1];
$item_count--;
return if $comma_count == 1;
shift @item_lengths;
shift @i_term_begin;
shift @i_term_end;
shift @i_term_comma;
}
else {
if ( $first_term_length > $max_length[0] ) {
$max_length[0] = $first_term_length;
}
}
my $pair_width = ( $max_length[0] + $max_length[1] );
my $max_width =
( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
my $columns = table_columns_available($i_first_comma);
my $number_of_fields_max =
maximum_number_of_fields( $columns, $odd_or_even, $max_width,
$pair_width );
my $number_of_fields = $number_of_fields_max;
my ( $number_of_fields_best, $ri_ragged_break_list,
$new_identifier_count )
= study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
$max_width );
if ( $number_of_fields_best != 0
&& $number_of_fields_best < $number_of_fields_max )
{
$number_of_fields = $number_of_fields_best;
}
if (
$rOpts_line_up_parentheses
&& (
$number_of_fields == 0
|| ( $number_of_fields == 1
&& $number_of_fields != $number_of_fields_best )
)
)
{
my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
if ( $available_spaces > 0 ) {
my $spaces_wanted = $max_width - $columns;
if ( $number_of_fields_best == 0 ) {
$number_of_fields_best =
get_maximum_fields_wanted( \@item_lengths );
}
if ( $number_of_fields_best != 1 ) {
my $spaces_wanted_2 =
1 + $pair_width - $columns; if ( $available_spaces > $spaces_wanted_2 ) {
$spaces_wanted = $spaces_wanted_2;
}
}
if ( $spaces_wanted > 0 ) {
my $deleted_spaces =
reduce_lp_indentation( $i_first_comma, $spaces_wanted );
if ( $deleted_spaces > 0 ) {
$columns = table_columns_available($i_first_comma);
$number_of_fields_max =
maximum_number_of_fields( $columns, $odd_or_even,
$max_width, $pair_width );
$number_of_fields = $number_of_fields_max;
if ( $number_of_fields_best == 1
&& $number_of_fields >= 1 )
{
$number_of_fields = $number_of_fields_best;
}
}
}
}
}
if ( $number_of_fields <= 0 ) {
$number_of_fields = int( $columns / $max_width );
}
if ( $rOpts_maximum_fields_per_table
&& $number_of_fields > $rOpts_maximum_fields_per_table )
{
$number_of_fields = $rOpts_maximum_fields_per_table;
}
my $packed_columns = token_sequence_length( $i_opening_paren + 1,
$i_effective_last_comma + 1 );
if ( $columns <= 0 ) { $columns = 1 } my $packed_lines = 1 + int( $packed_columns / $columns );
my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
if ( $number_of_fields <= 0 ) {
my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
my $long_first_term =
excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
if (
$number_of_fields_best == 1
|| $in_hierarchical_list
|| ( $comma_count > 1
&& !( $long_last_term || $long_first_term ) )
)
{
foreach ( 0 .. $comma_count - 1 ) {
set_forced_breakpoint( $$rcomma_index[$_] );
}
}
elsif ($long_last_term) {
set_forced_breakpoint($i_last_comma);
$$rdo_not_break_apart = 1 unless $must_break_open;
}
elsif ($long_first_term) {
set_forced_breakpoint($i_first_comma);
}
else {
}
return;
}
my $formatted_lines = $item_count / ($number_of_fields);
if ( $formatted_lines != int $formatted_lines ) {
$formatted_lines = 1 + int $formatted_lines;
}
$number_of_fields =
compactify_table( $item_count, $number_of_fields, $formatted_lines,
$odd_or_even );
my $columns_per_line =
( int $number_of_fields / 2 ) * $pair_width +
( $number_of_fields % 2 ) * $max_width;
my $formatted_columns;
if ( $number_of_fields > 1 ) {
$formatted_columns =
( $pair_width * ( int( $item_count / 2 ) ) +
( $item_count % 2 ) * $max_width );
}
else {
$formatted_columns = $max_width * $item_count;
}
if ( $formatted_columns < $packed_columns ) {
$formatted_columns = $packed_columns;
}
my $unused_columns = $formatted_columns - $packed_columns;
my $sparsity = ($unused_columns) / ($formatted_columns);
my $max_allowed_sparsity =
( $item_count < 3 ) ? 0.1
: ( $packed_lines == 1 ) ? 0.15
: ( $packed_lines == 2 ) ? 0.4
: 0.7;
if (
$packed_lines <= 2 && $item_count < 9 && $opening_environment eq 'BLOCK' && $opening_token eq '(' )
{
if (
$rOpts_line_up_parentheses && $item_count == 2 && !$must_break_open
)
{
my $i_break = $$rcomma_index[0];
set_forced_breakpoint($i_break);
$$rdo_not_break_apart = 1;
set_non_alignment_flags( $comma_count, $rcomma_index );
return;
}
if (
( $number_of_fields == 2 && $item_count == 3 )
|| (
$new_identifier_count > 0 && $sparsity > 0.15
) )
{
my $break_count = set_ragged_breakpoints( \@i_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
unless ($must_break_open) {
if ( $break_count <= 1 ) {
$$rdo_not_break_apart = 1;
}
elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
{
$$rdo_not_break_apart = 1;
}
}
set_non_alignment_flags( $comma_count, $rcomma_index );
return;
}
}
FORMATTER_DEBUG_FLAG_SPARSE && do {
print
"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
};
my $total_columns = table_columns_available($i_opening_paren);
my $too_long = $packed_columns > $total_columns;
if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
$too_long = excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
my $i_opening_minus = $i_opening_paren - 4;
if ( $i_opening_minus >= 0 ) {
$too_long = excess_line_length( $i_opening_minus,
$i_effective_last_comma + 1 ) > 0;
}
}
my $must_break_open_container = $must_break_open
|| ( $too_long
&& ( $in_hierarchical_list || $opening_token ne '(' ) );
if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
|| ( $formatted_lines < 2 )
|| ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
)
{
if ( $packed_lines > 2 && $item_count > 10 ) {
write_logfile_entry("List sparse: using old breakpoints\n");
copy_old_breakpoints( $i_first_comma, $i_last_comma );
}
else {
my $break_count = set_ragged_breakpoints( \@i_term_comma,
$ri_ragged_break_list );
++$break_count if ($use_separate_first_term);
unless ($must_break_open_container) {
if ( $break_count <= 1 ) {
$$rdo_not_break_apart = 1;
}
elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
{
$$rdo_not_break_apart = 1;
}
}
set_non_alignment_flags( $comma_count, $rcomma_index );
}
return;
}
write_logfile_entry(
"List: auto formatting with $number_of_fields fields/row\n");
my $j_first_break =
$use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
for (
my $j = $j_first_break ;
$j < $comma_count ;
$j += $number_of_fields
)
{
my $i = $$rcomma_index[$j];
set_forced_breakpoint($i);
}
return;
}
}
sub set_non_alignment_flags {
my ( $comma_count, $rcomma_index ) = @_;
foreach ( 0 .. $comma_count - 1 ) {
$matching_token_to_go[ $$rcomma_index[$_] ] = 1;
}
}
sub study_list_complexity {
my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
my $item_count = @{$ri_term_begin};
my $complex_item_count = 0;
my $number_of_fields_best = $rOpts_maximum_fields_per_table;
my $i_max = @{$ritem_lengths} - 1;
my $i_last_last_break = -3;
my $i_last_break = -2;
my @i_ragged_break_list;
my $definitely_complex = 30;
my $definitely_simple = 12;
my $quote_count = 0;
for my $i ( 0 .. $i_max ) {
my $ib = $ri_term_begin->[$i];
my $ie = $ri_term_end->[$i];
my $weighted_length = ( $ritem_lengths->[$i] - 2 );
my $is_quote = 0;
if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
$is_quote = 1;
$quote_count++;
}
elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
$quote_count++;
}
if ( $ib eq $ie ) {
if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
$complex_item_count++;
$weighted_length *= 2;
}
else {
}
}
else {
if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
$complex_item_count++;
$weighted_length *= 2;
}
if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
$weighted_length += 4;
}
}
$weighted_length += 2 * ( $ie - $ib );
if ( $weighted_length >= $definitely_complex ) {
if ( $i_last_break == $i - 1
&& $i > 1
&& $i_last_last_break != $i - 2 )
{
pop @i_ragged_break_list;
push @i_ragged_break_list, $i - 2;
push @i_ragged_break_list, $i - 1;
}
push @i_ragged_break_list, $i;
$i_last_last_break = $i_last_break;
$i_last_break = $i;
}
elsif ($i == $i_max
&& $i_last_break == $i - 1
&& $weighted_length <= $definitely_simple )
{
pop @i_ragged_break_list;
}
}
my $identifier_count = $i_max + 1 - $quote_count;
if ( $max_width > 12
&& $complex_item_count > $item_count / 2
&& $number_of_fields_best != 2 )
{
$number_of_fields_best = 1;
}
return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
}
sub get_maximum_fields_wanted {
my ($ritem_lengths) = @_;
my $number_of_fields_best = 0;
my $item_count = @{$ritem_lengths};
if ( $item_count <= 5 ) {
$number_of_fields_best = 1;
}
else {
my $is_odd = 1;
my @max_length = ( 0, 0 );
my @last_length_2 = ( undef, undef );
my @first_length_2 = ( undef, undef );
my $last_length = undef;
my $total_variation_1 = 0;
my $total_variation_2 = 0;
my @total_variation_2 = ( 0, 0 );
for ( my $j = 0 ; $j < $item_count ; $j++ ) {
$is_odd = 1 - $is_odd;
my $length = $ritem_lengths->[$j];
if ( $length > $max_length[$is_odd] ) {
$max_length[$is_odd] = $length;
}
if ( defined($last_length) ) {
my $dl = abs( $length - $last_length );
$total_variation_1 += $dl;
}
$last_length = $length;
my $ll = $last_length_2[$is_odd];
if ( defined($ll) ) {
my $dl = abs( $length - $ll );
$total_variation_2[$is_odd] += $dl;
}
else {
$first_length_2[$is_odd] = $length;
}
$last_length_2[$is_odd] = $length;
}
$total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
unless ( $total_variation_2 < $factor * $total_variation_1 ) {
$number_of_fields_best = 1;
}
}
return ($number_of_fields_best);
}
sub table_columns_available {
my $i_first_comma = shift;
my $columns =
$rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
$columns -= 1;
return $columns;
}
sub maximum_number_of_fields {
my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
my $max_pairs = int( $columns / $pair_width );
my $number_of_fields = $max_pairs * 2;
if ( $odd_or_even == 1
&& $max_pairs * $pair_width + $max_width <= $columns )
{
$number_of_fields++;
}
return $number_of_fields;
}
sub compactify_table {
my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
my $min_fields;
for (
$min_fields = $number_of_fields ;
$min_fields >= $odd_or_even
&& $min_fields * $formatted_lines >= $item_count ;
$min_fields -= $odd_or_even
)
{
$number_of_fields = $min_fields;
}
}
return $number_of_fields;
}
sub set_ragged_breakpoints {
my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
my $break_count = 0;
foreach (@$ri_ragged_break_list) {
my $j = $ri_term_comma->[$_];
if ($j) {
set_forced_breakpoint($j);
$break_count++;
}
}
return $break_count;
}
sub copy_old_breakpoints {
my ( $i_first_comma, $i_last_comma ) = @_;
for my $i ( $i_first_comma .. $i_last_comma ) {
if ( $old_breakpoint_to_go[$i] ) {
set_forced_breakpoint($i);
}
}
}
sub set_nobreaks {
my ( $i, $j ) = @_;
if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
FORMATTER_DEBUG_FLAG_NOBREAK && do {
my ( $a, $b, $c ) = caller();
print(
"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
);
};
@nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
}
else {
FORMATTER_DEBUG_FLAG_NOBREAK && do {
my ( $a, $b, $c ) = caller();
print(
"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
);
};
}
}
sub set_fake_breakpoint {
$forced_breakpoint_count++;
}
sub set_forced_breakpoint {
my $i = shift;
return unless defined $i && $i >= 0;
my $token = $tokens_to_go[$i];
if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
}
elsif ( $is_if_unless{$token} ) { $i-- }
if ( $i >= 0 && $i <= $max_index_to_go ) {
my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
FORMATTER_DEBUG_FLAG_FORCE && do {
my ( $a, $b, $c ) = caller();
print
"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
};
if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
$forced_breakpoint_to_go[$i_nonblank] = 1;
if ( $i_nonblank > $index_max_forced_break ) {
$index_max_forced_break = $i_nonblank;
}
$forced_breakpoint_count++;
$forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
$i_nonblank;
if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
set_closing_breakpoint($i_nonblank);
}
}
}
}
sub clear_breakpoint_undo_stack {
$forced_breakpoint_undo_count = 0;
}
sub undo_forced_breakpoint_stack {
my $i_start = shift;
if ( $i_start < 0 ) {
$i_start = 0;
my ( $a, $b, $c ) = caller();
warning(
"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
);
}
while ( $forced_breakpoint_undo_count > $i_start ) {
my $i =
$forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
if ( $i >= 0 && $i <= $max_index_to_go ) {
$forced_breakpoint_to_go[$i] = 0;
$forced_breakpoint_count--;
FORMATTER_DEBUG_FLAG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
print(
"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
);
};
}
else {
FORMATTER_DEBUG_FLAG_UNDOBP && do {
my ( $a, $b, $c ) = caller();
print(
"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
);
};
}
}
}
{
my %is_amp_amp;
my %is_ternary;
my %is_math_op;
BEGIN {
@_ = qw( && || );
@is_amp_amp{@_} = (1) x scalar(@_);
@_ = qw( ? : );
@is_ternary{@_} = (1) x scalar(@_);
@_ = qw( + - * / );
@is_math_op{@_} = (1) x scalar(@_);
}
sub recombine_breakpoints {
my ( $ri_beg, $ri_end ) = @_;
my $more_to_do = 1;
my $nmax_last = @$ri_end;
while ($more_to_do) {
my $n_best = 0;
my $bs_best;
my $n;
my $nmax = @$ri_end - 1;
unless ( $nmax < $nmax_last ) {
die "Program bug-infinite loop in recombine breakpoints\n";
}
$nmax_last = $nmax;
$more_to_do = 0;
my $previous_outdentable_closing_paren;
my $leading_amp_count = 0;
my $this_line_is_semicolon_terminated;
for $n ( 1 .. $nmax ) {
my $ibeg_1 = $$ri_beg[ $n - 1 ];
my $iend_1 = $$ri_end[ $n - 1 ];
my $iend_2 = $$ri_end[$n];
my $ibeg_2 = $$ri_beg[$n];
my $ibeg_nmax = $$ri_beg[$nmax];
my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
my $bs_tweak = 0;
if ( $n == $nmax ) {
next if $types_to_go[$ibeg_2] eq '{';
$this_line_is_semicolon_terminated =
$types_to_go[$iend_2] eq ';'
|| ( $types_to_go[$iend_2] eq '#'
&& $iend_2 - $ibeg_2 >= 2
&& $types_to_go[ $iend_2 - 2 ] eq ';'
&& $types_to_go[ $iend_2 - 1 ] eq 'b' );
}
if ( $types_to_go[$iend_1] eq '}' ) {
$previous_outdentable_closing_paren =
$this_line_is_semicolon_terminated && $ibeg_1 == $iend_1 && $tokens_to_go[$iend_1] eq
')'
&& ( $leading_amp_count == 0
|| $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
&& $types_to_go[$ibeg_2] ne ':'
&& ( $nesting_depth_to_go[$iend_1] ==
$nesting_depth_to_go[$iend_2] + 1 );
next
unless (
$previous_outdentable_closing_paren
|| ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
);
}
elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
next unless $want_break_before{ $types_to_go[$iend_1] };
}
elsif ( $types_to_go[$iend_1] eq ':' ) {
next unless $want_break_before{ $types_to_go[$iend_1] };
}
elsif ( $types_to_go[$iend_1] eq '?' ) {
next
if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
next unless $types_to_go[$iend_2] eq ':';
}
elsif ( $types_to_go[$iend_1] eq ',' ) {
next if ( $old_breakpoint_to_go[$iend_1] );
if ( $types_to_go[$ibeg_1] eq '}'
&& $types_to_go[$ibeg_2] eq 'i' )
{
next
unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
&& ( $iend_2 == ( $ibeg_2 + 1 ) )
&& $this_line_is_semicolon_terminated );
$forced_breakpoint_to_go[$iend_1] = 0;
}
else {
next unless ( $n + 1 >= $nmax );
next
if (
$levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
my $saw_paren;
foreach my $ii ( $ibeg_2 .. $iend_2 ) {
if ( $tokens_to_go[$ii] eq '(' ) {
$saw_paren = 1;
last;
}
}
next if $saw_paren;
}
}
elsif ( $types_to_go[$iend_1] eq '(' ) {
}
elsif ( $types_to_go[$iend_1] eq ')' ) {
}
elsif ( $types_to_go[$iend_1] eq 'f' ) {
next;
}
elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
my $is_short_quote =
( $types_to_go[$ibeg_2] eq 'Q'
&& $ibeg_2 == $iend_2
&& length( $tokens_to_go[$ibeg_2] ) <
$rOpts_short_concatenation_item_length );
my $is_ternary =
( $types_to_go[$ibeg_1] eq '?'
&& ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
if ( $ibeg_1 != $iend_1
&& !$is_short_quote
&& !$is_ternary )
{
next
unless (
(
$nmax < $n + 2
|| ( $nmax == $n + 2
&& $types_to_go[$ibeg_nmax] eq ';' )
|| $types_to_go[$iend_2] eq 'h'
|| ( !$forced_breakpoint_to_go[$iend_1]
&& $types_to_go[$iend_2] eq '{' )
)
&& ( $ibeg_3 >= 0
&& $types_to_go[$ibeg_2] ne
$types_to_go[$ibeg_3] )
);
if ( !$rOpts_line_up_parentheses
|| $types_to_go[$iend_2] ne ',' )
{
my $tv = 0;
my $depth = $nesting_depth_to_go[$ibeg_2];
for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
last if ( $tv > 1 );
}
$depth = $nesting_depth_to_go[$i];
}
if ( $tv > 0 ) {
next if ( $tv > 1 );
my $istop =
( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
if ( $nesting_depth_to_go[$i] != $depth ) {
$tv++;
last if ( $tv > 2 );
}
$depth = $nesting_depth_to_go[$i];
}
next if ( $tv > 2 );
}
}
}
unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
$forced_breakpoint_to_go[$iend_1] = 0;
}
}
elsif ( $types_to_go[$iend_1] eq 'k' ) {
next
if (
$is_last_next_redo_return{ $tokens_to_go[$iend_1] }
&& $n < $nmax
);
if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
next
unless $want_break_before{ $tokens_to_go[$iend_1] };
}
}
elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
my $i_next_nonblank = $ibeg_2;
my $i_next_next = $i_next_nonblank + 1;
$i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
&& (
$i_next_nonblank == $iend_2
|| ( $i_next_next == $iend_2
&& $is_math_op{ $types_to_go[$i_next_next] } )
|| $types_to_go[$i_next_next] eq ';'
);
my $iend_1_minus = $iend_1;
$iend_1_minus--
if ( $iend_1_minus > $ibeg_1 );
$iend_1_minus--
if ( $types_to_go[$iend_1_minus] eq 'b'
&& $iend_1_minus > $ibeg_1 );
my $short_term_follows =
( $types_to_go[$iend_2] eq $types_to_go[$iend_1]
&& $types_to_go[$iend_1_minus] =~ /^[in]$/
&& $iend_2 <= $ibeg_2 + 2
&& length( $tokens_to_go[$ibeg_2] ) <
$rOpts_short_concatenation_item_length );
next
unless ( $number_follows || $short_term_follows );
}
if ($previous_outdentable_closing_paren) {
$forced_breakpoint_to_go[$iend_1] = 0;
}
elsif ( $types_to_go[$ibeg_2] eq ':' ) {
$leading_amp_count++;
next if $want_break_before{ $types_to_go[$ibeg_2] };
}
elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
$leading_amp_count++;
my $ok =
( $is_ternary{ $types_to_go[$ibeg_1] }
&& $tokens_to_go[$iend_2] eq '(' )
|| ( $ibeg_3 >= 0
&& $is_ternary{ $types_to_go[$ibeg_3] }
&& $nesting_depth_to_go[$ibeg_3] ==
$nesting_depth_to_go[$ibeg_2] );
next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
$forced_breakpoint_to_go[$iend_1] = 0;
$bs_tweak = 0.25;
}
elsif ( $types_to_go[$ibeg_2] eq '?' ) {
my $lev = $levels_to_go[$ibeg_2];
next if ( $lev ne $levels_to_go[$ibeg_1] );
my $follows_colon =
$ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
my $precedes_colon =
$ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
next unless ( $follows_colon || $precedes_colon );
if ( !$follows_colon ) {
my $local_count = 0;
foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
$local_count++
if $ii >= 0
&& $types_to_go[$ii] eq ':'
&& $levels_to_go[$ii] == $lev;
}
next unless ( $local_count > 1 );
}
$forced_breakpoint_to_go[$iend_1] = 0;
}
elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
my $i_next_nonblank = $ibeg_2 + 1;
if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
$i_next_nonblank++;
}
next
unless (
(
$n == 2
&& $n == $nmax
&& $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
)
|| ( $types_to_go[$i_next_nonblank] eq 'Q'
&& $i_next_nonblank >= $iend_2 - 1
&& length( $tokens_to_go[$i_next_nonblank] ) <
$rOpts_short_concatenation_item_length )
);
}
elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
next
unless (
$this_line_is_semicolon_terminated
&& (
$types_to_go[$ibeg_1] eq 'k'
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
&& ( $iend_2 - $ibeg_2 <= 7 )
)
);
}
elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
next
unless (
$this_line_is_semicolon_terminated
&& (
$types_to_go[$ibeg_1] eq 'k'
&& ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
|| $tokens_to_go[$ibeg_1] eq 'or' )
)
);
}
elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
next
unless (
$this_line_is_semicolon_terminated
&& $types_to_go[$ibeg_1] eq 'k'
&& $is_and_or{ $tokens_to_go[$ibeg_1] }
);
}
else {
unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
next
if ( ( $types_to_go[$iend_1] ne 'k' )
&& ( $tokens_to_go[$ibeg_2] ne 'while' ) );
}
}
}
elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
next
unless (
$this_line_is_semicolon_terminated
&& $types_to_go[$ibeg_1] eq 'k'
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
);
}
elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
my $i_next_nonblank = $ibeg_2 + 1;
if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
$i_next_nonblank++;
}
my $i_next_next = $i_next_nonblank + 1;
$i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
my $is_number = (
$types_to_go[$i_next_nonblank] eq 'n'
&& ( $i_next_nonblank >= $iend_2 - 1
|| $types_to_go[$i_next_next] eq ';' )
);
my $iend_1_nonblank =
$types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
my $iend_2_nonblank =
$types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
my $is_short_term =
( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
&& $types_to_go[$iend_2_nonblank] =~ /^[in]$/
&& $types_to_go[$iend_1_nonblank] =~ /^[in]$/
&& $iend_2_nonblank <= $ibeg_2 + 2
&& length( $tokens_to_go[$iend_2_nonblank] ) <
$rOpts_short_concatenation_item_length );
next
unless (
$is_number
|| $is_short_term
|| ( $n == 2
&& $n == $nmax
&& $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
);
}
elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
next unless $n == 1;
next
unless (
$nmax == 2
|| ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
|| $types_to_go[$iend_2] eq 'h'
);
}
next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
next
if excess_line_length( $ibeg_1, $iend_2 ) > 0;
if ( $n < $nmax ) {
my $if_next = $$ri_beg[ $n + 1 ];
next
if (
$levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
&& $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
&& !(
$n == 1
&& $iend_1 - $ibeg_1 <= 2
&& $types_to_go[$ibeg_1] eq 'k'
&& $tokens_to_go[$ibeg_1] eq 'if'
&& $tokens_to_go[$iend_1] ne '('
)
);
}
next if ( $bs == NO_BREAK );
if ( !$n_best ) {
$n_best = $n;
$bs_best = $bs;
}
else {
if ( $bs > $bs_best ) {
$n_best = $n;
$bs_best = $bs;
}
}
}
if ($n_best) {
splice @$ri_beg, $n_best, 1;
splice @$ri_end, $n_best - 1, 1;
$more_to_do++;
}
}
return ( $ri_beg, $ri_end );
}
}
sub break_all_chain_tokens {
my ( $ri_left, $ri_right ) = @_;
my %saw_chain_type;
my %left_chain_type;
my %right_chain_type;
my %interior_chain_type;
my $nmax = @$ri_right - 1;
my $count = 0;
for my $n ( 0 .. $nmax ) {
my $il = $$ri_left[$n];
my $ir = $$ri_right[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
$typel = '+' if ( $typel eq '-' ); $typer = '+' if ( $typer eq '-' );
$typel = '*' if ( $typel eq '/' ); $typer = '*' if ( $typer eq '/' );
my $tokenl = $tokens_to_go[$il];
my $tokenr = $tokens_to_go[$ir];
if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
next if ( $typel eq '?' );
push @{ $left_chain_type{$typel} }, $il;
$saw_chain_type{$typel} = 1;
$count++;
}
if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
next if ( $typer eq '?' );
push @{ $right_chain_type{$typer} }, $ir;
$saw_chain_type{$typer} = 1;
$count++;
}
}
return unless $count;
$count = 0;
for my $n ( 0 .. $nmax ) {
my $il = $$ri_left[$n];
my $ir = $$ri_right[$n];
for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
my $type = $types_to_go[$i];
$type = '+' if ( $type eq '-' );
$type = '*' if ( $type eq '/' );
if ( $saw_chain_type{$type} ) {
push @{ $interior_chain_type{$type} }, $i;
$count++;
}
}
}
return unless $count;
my @insert_list;
foreach my $type ( keys %saw_chain_type ) {
last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
foreach my $itest ( @{ $interior_chain_type{$type} } ) {
if ( $left_chain_type{$type} ) {
next if $nobreak_to_go[ $itest - 1 ];
foreach my $i ( @{ $left_chain_type{$type} } ) {
next unless in_same_container( $i, $itest );
push @insert_list, $itest - 1;
if ( $type eq ':'
&& $levels_to_go[$i] != $levels_to_go[$itest] )
{
my $i_question = $mate_index_to_go[$itest];
if ( $i_question > 0 ) {
push @insert_list, $i_question - 1;
}
}
last;
}
}
if ( $right_chain_type{$type} ) {
next if $nobreak_to_go[$itest];
foreach my $i ( @{ $right_chain_type{$type} } ) {
next unless in_same_container( $i, $itest );
push @insert_list, $itest;
if ( $type eq ':'
&& $levels_to_go[$i] != $levels_to_go[$itest] )
{
my $i_question = $mate_index_to_go[$itest];
if ( $i_question >= 0 ) {
push @insert_list, $i_question;
}
}
last;
}
}
}
}
if (@insert_list) {
insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
}
sub break_equals {
my ( $ri_left, $ri_right ) = @_;
my $nmax = @$ri_right - 1;
return unless ( $nmax >= 2 );
my $tokbeg = "";
my $depth_beg;
for my $n ( 1 .. 2 ) {
my $il = $$ri_left[$n];
my $typel = $types_to_go[$il];
my $tokenl = $tokens_to_go[$il];
my $has_leading_op = ( $tokenl =~ /^\w/ )
? $is_chain_operator{$tokenl} : $is_chain_operator{$typel}; return unless ($has_leading_op);
if ( $n > 1 ) {
return
unless ( $tokenl eq $tokbeg
&& $nesting_depth_to_go[$il] eq $depth_beg );
}
$tokbeg = $tokenl;
$depth_beg = $nesting_depth_to_go[$il];
}
my $il = $$ri_left[0];
my $ir = $$ri_right[0];
my @insert_list;
for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
my $type = $types_to_go[$i];
if ( $is_assignment{$type}
&& $nesting_depth_to_go[$i] eq $depth_beg )
{
if ( $want_break_before{$type} ) {
push @insert_list, $i - 1;
}
else {
push @insert_list, $i;
}
}
}
my $i = 0;
if ( $types_to_go[$i] eq 'k'
&& $tokens_to_go[$i] eq 'return'
&& $ir > $il
&& $nesting_depth_to_go[$i] eq $depth_beg )
{
push @insert_list, $i;
}
return unless (@insert_list);
for my $n ( 1 .. 2 ) {
my $il = $$ri_left[$n];
my $ir = $$ri_right[$n];
for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
my $type = $types_to_go[$i];
return
if ( $is_assignment{$type}
&& $nesting_depth_to_go[$i] eq $depth_beg );
}
}
if (@insert_list) {
insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
}
sub insert_final_breaks {
my ( $ri_left, $ri_right ) = @_;
my $nmax = @$ri_right - 1;
my $count = 0;
my $i_first_colon = -1;
for my $n ( 0 .. $nmax ) {
my $il = $$ri_left[$n];
my $ir = $$ri_right[$n];
my $typel = $types_to_go[$il];
my $typer = $types_to_go[$ir];
return if ( $typel eq '?' );
return if ( $typer eq '?' );
if ( $typel eq ':' ) { $i_first_colon = $il; last; }
elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
}
if ( $i_first_colon > 0 ) {
my $i_question = $mate_index_to_go[$i_first_colon];
if ( $i_question > 0 ) {
my @insert_list;
for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
my $token = $tokens_to_go[$ii];
my $type = $types_to_go[$ii];
if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
&& in_same_container( $ii, $i_question ) )
{
push @insert_list, $ii;
last;
}
}
if (@insert_list) {
insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
}
}
}
}
sub in_same_container {
my ( $i1, $i2 ) = @_;
my $type = $types_to_go[$i1];
my $depth = $nesting_depth_to_go[$i1];
return unless ( $nesting_depth_to_go[$i2] == $depth );
if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
return if ( $i2-$i1 > 200 );
for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
next if ( $nesting_depth_to_go[$i] > $depth );
return if ( $nesting_depth_to_go[$i] < $depth );
my $tok = $tokens_to_go[$i];
$tok = ',' if $tok eq '=>';
if ( $type ne ':' ) {
return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
}
else {
return if ( $tok =~ /^[\,]$/ );
}
}
return 1;
}
sub set_continuation_breaks {
my $saw_good_break = shift;
my @i_first = (); my @i_last = (); my @i_colon_breaks = (); if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
set_bond_strengths();
my $imin = 0;
my $imax = $max_index_to_go;
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
my $i_begin = $imin;
my $leading_spaces = leading_spaces_to_go($imin);
my $line_count = 0;
my $last_break_strength = NO_BREAK;
my $i_last_break = -1;
my $max_bias = 0.001;
my $tiny_bias = 0.0001;
my $leading_alignment_token = "";
my $leading_alignment_type = "";
my $colons_in_order = 1;
my $last_tok = "";
my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
my $colon_count = @colon_list;
foreach (@colon_list) {
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
$last_tok = $_;
}
my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
while ( $i_begin <= $imax ) {
my $lowest_strength = NO_BREAK;
my $starting_sum = $lengths_to_go[$i_begin];
my $i_lowest = -1;
my $i_test = -1;
my $lowest_next_token = '';
my $lowest_next_type = 'b';
my $i_lowest_next_nonblank = -1;
for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
my $type = $types_to_go[$i_test];
my $token = $tokens_to_go[$i_test];
my $next_type = $types_to_go[ $i_test + 1 ];
my $next_token = $tokens_to_go[ $i_test + 1 ];
my $i_next_nonblank =
( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
my $strength = $bond_strength_to_go[$i_test];
my $must_break = 0;
if (
(
$next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
|| ( $next_nonblank_type eq 'k'
&& $next_nonblank_token =~ /^(and|or)$/ )
)
&& ( $nesting_depth_to_go[$i_begin] >
$nesting_depth_to_go[$i_next_nonblank] )
)
{
set_forced_breakpoint($i_next_nonblank);
}
if (
$forced_breakpoint_to_go[$i_test]
|| ( $line_count
&& ( $token eq ')' )
&& ( $next_nonblank_type eq '{' )
&& ($next_nonblank_block_type)
&& !$rOpts->{'opening-brace-always-on-right'} )
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
)
{
if ( $strength < NO_BREAK ) {
$strength = $lowest_strength - $tiny_bias;
$must_break = 1;
}
}
if (
!$must_break
&& ( $next_nonblank_type =~ /^[\;\,]$/ )
&& (
(
$leading_spaces +
$lengths_to_go[ $i_next_nonblank + 1 ] -
$starting_sum
) > $rOpts_maximum_line_length
)
)
{
last if ( $i_lowest >= 0 );
}
if (
!$must_break
&& ( $i_test == $i_begin )
&& ( $i_test < $imax )
&& ( $token eq $type )
&& (
(
$leading_spaces +
$lengths_to_go[ $i_test + 1 ] -
$starting_sum
) <= $rOpts_maximum_line_length
)
)
{
$i_test++;
if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
$i_test++;
}
redo;
}
if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
{
last
if ($leading_alignment_type);
last
if (
$i_test == $imax && !$forced_breakpoint_count && $saw_good_break && $type =~ /^[ && $i_last_break < 0 && $i_lowest > 0 && $i_lowest < $imax - 1 && $strength - $lowest_strength < 0.5 * WEAK );
$lowest_strength = $strength;
$i_lowest = $i_test;
$lowest_next_token = $next_nonblank_token;
$lowest_next_type = $next_nonblank_type;
$i_lowest_next_nonblank = $i_next_nonblank;
last if $must_break;
if ( $line_count > 0
&& $i_test < $imax
&& ( $lowest_strength - $last_break_strength <= $max_bias )
)
{
my $i_last_end = $i_begin - 1;
if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
my $tok_beg = $tokens_to_go[$i_begin];
my $type_beg = $types_to_go[$i_begin];
if (
(
$tok_beg eq $next_nonblank_token
&& $is_chain_operator{$tok_beg}
&& ( $type_beg eq 'k'
|| $type_beg eq $tok_beg )
&& $nesting_depth_to_go[$i_begin] >=
$nesting_depth_to_go[$i_next_nonblank]
)
|| ( $tokens_to_go[$i_last_end] eq $token
&& $is_chain_operator{$token}
&& ( $type eq 'k' || $type eq $token )
&& $nesting_depth_to_go[$i_last_end] >=
$nesting_depth_to_go[$i_test] )
)
{
$leading_alignment_token = $next_nonblank_token;
$leading_alignment_type = $next_nonblank_type;
}
}
}
my $too_long =
( $i_test >= $imax )
? 1
: (
(
$leading_spaces +
$lengths_to_go[ $i_test + 2 ] -
$starting_sum
) > $rOpts_maximum_line_length
);
FORMATTER_DEBUG_FLAG_BREAK
&& print
"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
if ( $rOpts_fuzzy_line_length
&& $too_long
&& ( $i_lowest == $i_test )
&& ( length($token) > 1 )
&& ( $next_nonblank_type =~ /^[\;\,]$/ ) )
{
$too_long = 0;
}
last
if (
( $i_test == $imax ) || (
( $i_lowest >= 0 ) && $too_long
)
);
}
if ( $i_lowest < 0 ) { $i_lowest = $imax }
my $i_next_nonblank = (
( $types_to_go[ $i_lowest + 1 ] eq 'b' )
? $i_lowest + 2
: $i_lowest + 1
);
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
my $i;
foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
next unless ( $tokens_to_go[$i] eq '?' );
next if ($is_colon_chain);
next
if (
$tokens_to_go[$max_index_to_go] eq '#'
&& terminal_type( \@types_to_go, \@block_type_to_go, 0,
$max_index_to_go ) !~ /^[\;\}]$/
);
next
if ( $mate_index_to_go[$i] >= 0
&& $mate_index_to_go[$i] <= $i_next_nonblank );
$i_lowest = $i;
if ( $want_break_before{'?'} ) { $i_lowest-- }
last;
}
$i_next_nonblank = (
( $types_to_go[ $i_lowest + 1 ] eq 'b' )
? $i_lowest + 2
: $i_lowest + 1
);
$next_nonblank_type = $types_to_go[$i_next_nonblank];
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
FORMATTER_DEBUG_FLAG_BREAK
&& print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
if ( $next_nonblank_type eq '?' ) {
set_closing_breakpoint($i_next_nonblank);
}
elsif ( $types_to_go[$i_lowest] eq '?' ) {
set_closing_breakpoint($i_lowest);
}
if ( $next_nonblank_type eq ':' ) {
push @i_colon_breaks, $i_next_nonblank;
}
elsif ( $types_to_go[$i_lowest] eq ':' ) {
push @i_colon_breaks, $i_lowest;
}
$line_count++;
push( @i_first,
( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
push( @i_last,
( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
&& !$forced_breakpoint_to_go[$i_lowest] )
{
set_closing_breakpoint($i_lowest);
}
$i_begin = $i_lowest + 1;
$last_break_strength = $lowest_strength;
$i_last_break = $i_lowest;
$leading_alignment_token = "";
$leading_alignment_type = "";
$lowest_next_token = '';
$lowest_next_type = 'b';
if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
$i_begin++;
}
if ( $i_begin <= $imax ) {
$leading_spaces = leading_spaces_to_go($i_begin);
}
}
if (@i_colon_breaks) {
my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
unless ($is_chain) {
my @insert_list = ();
foreach (@i_colon_breaks) {
my $i_question = $mate_index_to_go[$_];
if ( $i_question >= 0 ) {
if ( $want_break_before{'?'} ) {
$i_question--;
if ( $i_question > 0
&& $types_to_go[$i_question] eq 'b' )
{
$i_question--;
}
}
if ( $i_question >= 0 ) {
push @insert_list, $i_question;
}
}
insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
}
}
}
return ( \@i_first, \@i_last, $colon_count );
}
sub insert_additional_breaks {
my ( $ri_break_list, $ri_first, $ri_last ) = @_;
my $i_f;
my $i_l;
my $line_number = 0;
my $i_break_left;
foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
$i_f = $$ri_first[$line_number];
$i_l = $$ri_last[$line_number];
while ( $i_break_left >= $i_l ) {
$line_number++;
if ( $line_number >= @$ri_last ) {
warning(
"Non-fatal program bug: couldn't set break at $i_break_left\n"
);
report_definite_bug();
return;
}
$i_f = $$ri_first[$line_number];
$i_l = $$ri_last[$line_number];
}
my $i_break_right = $i_break_left + 1;
if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
if ( $i_break_left >= $i_f
&& $i_break_left < $i_l
&& $i_break_right > $i_f
&& $i_break_right <= $i_l )
{
splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
}
}
}
sub set_closing_breakpoint {
my $i_break = shift;
if ( $mate_index_to_go[$i_break] >= 0 ) {
if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
}
}
else {
my $type_sequence = $type_sequence_to_go[$i_break];
if ($type_sequence) {
my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
$postponed_breakpoint{$type_sequence} = 1;
}
}
}
sub compare_indentation_levels {
my ( $python_indentation_level, $structural_indentation_level ) = @_;
if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
$last_tabbing_disagreement = $input_line_number;
if ($in_tabbing_disagreement) {
}
else {
$tabbing_disagreement_count++;
if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
);
}
$in_tabbing_disagreement = $input_line_number;
$first_tabbing_disagreement = $in_tabbing_disagreement
unless ($first_tabbing_disagreement);
}
}
else {
if ($in_tabbing_disagreement) {
if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
write_logfile_entry(
"End indentation disagreement from input line $in_tabbing_disagreement\n"
);
if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
write_logfile_entry(
"No further tabbing disagreements will be noted\n");
}
}
$in_tabbing_disagreement = 0;
}
}
}
package Perl::Tidy::IndentationItem;
use constant SPACES => 0; use constant LEVEL => 1; use constant CI_LEVEL => 2; use constant AVAILABLE_SPACES => 3; use constant CLOSED => 4; use constant COMMA_COUNT => 5; use constant SEQUENCE_NUMBER => 6; use constant INDEX => 7; use constant HAVE_CHILD => 8; use constant RECOVERABLE_SPACES => 9; use constant ALIGN_PAREN => 10; use constant MARKED => 11; use constant STACK_DEPTH => 12; use constant STARTING_INDEX => 13; use constant ARROW_COUNT => 14;
sub new {
my (
$class, $spaces, $level,
$ci_level, $available_spaces, $index,
$gnu_sequence_number, $align_paren, $stack_depth,
$starting_index,
) = @_;
my $closed = -1;
my $arrow_count = 0;
my $comma_count = 0;
my $have_child = 0;
my $want_right_spaces = 0;
my $marked = 0;
bless [
$spaces, $level, $ci_level,
$available_spaces, $closed, $comma_count,
$gnu_sequence_number, $index, $have_child,
$want_right_spaces, $align_paren, $marked,
$stack_depth, $starting_index, $arrow_count,
], $class;
}
sub permanently_decrease_AVAILABLE_SPACES {
my ( $item, $spaces_needed ) = @_;
my $available_spaces = $item->get_AVAILABLE_SPACES();
my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
$item->decrease_AVAILABLE_SPACES($deleted_spaces);
$item->decrease_SPACES($deleted_spaces);
$item->set_RECOVERABLE_SPACES(0);
return $deleted_spaces;
}
sub tentatively_decrease_AVAILABLE_SPACES {
my ( $item, $spaces_needed ) = @_;
my $available_spaces = $item->get_AVAILABLE_SPACES();
my $deleted_spaces =
( $available_spaces > $spaces_needed )
? $spaces_needed
: $available_spaces;
$item->decrease_AVAILABLE_SPACES($deleted_spaces);
$item->decrease_SPACES($deleted_spaces);
$item->increase_RECOVERABLE_SPACES($deleted_spaces);
return $deleted_spaces;
}
sub get_STACK_DEPTH {
my $self = shift;
return $self->[STACK_DEPTH];
}
sub get_SPACES {
my $self = shift;
return $self->[SPACES];
}
sub get_MARKED {
my $self = shift;
return $self->[MARKED];
}
sub set_MARKED {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[MARKED] = $value;
}
return $self->[MARKED];
}
sub get_AVAILABLE_SPACES {
my $self = shift;
return $self->[AVAILABLE_SPACES];
}
sub decrease_SPACES {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[SPACES] -= $value;
}
return $self->[SPACES];
}
sub decrease_AVAILABLE_SPACES {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[AVAILABLE_SPACES] -= $value;
}
return $self->[AVAILABLE_SPACES];
}
sub get_ALIGN_PAREN {
my $self = shift;
return $self->[ALIGN_PAREN];
}
sub get_RECOVERABLE_SPACES {
my $self = shift;
return $self->[RECOVERABLE_SPACES];
}
sub set_RECOVERABLE_SPACES {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[RECOVERABLE_SPACES] = $value;
}
return $self->[RECOVERABLE_SPACES];
}
sub increase_RECOVERABLE_SPACES {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[RECOVERABLE_SPACES] += $value;
}
return $self->[RECOVERABLE_SPACES];
}
sub get_CI_LEVEL {
my $self = shift;
return $self->[CI_LEVEL];
}
sub get_LEVEL {
my $self = shift;
return $self->[LEVEL];
}
sub get_SEQUENCE_NUMBER {
my $self = shift;
return $self->[SEQUENCE_NUMBER];
}
sub get_INDEX {
my $self = shift;
return $self->[INDEX];
}
sub get_STARTING_INDEX {
my $self = shift;
return $self->[STARTING_INDEX];
}
sub set_HAVE_CHILD {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[HAVE_CHILD] = $value;
}
return $self->[HAVE_CHILD];
}
sub get_HAVE_CHILD {
my $self = shift;
return $self->[HAVE_CHILD];
}
sub set_ARROW_COUNT {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[ARROW_COUNT] = $value;
}
return $self->[ARROW_COUNT];
}
sub get_ARROW_COUNT {
my $self = shift;
return $self->[ARROW_COUNT];
}
sub set_COMMA_COUNT {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[COMMA_COUNT] = $value;
}
return $self->[COMMA_COUNT];
}
sub get_COMMA_COUNT {
my $self = shift;
return $self->[COMMA_COUNT];
}
sub set_CLOSED {
my ( $self, $value ) = @_;
if ( defined($value) ) {
$self->[CLOSED] = $value;
}
return $self->[CLOSED];
}
sub get_CLOSED {
my $self = shift;
return $self->[CLOSED];
}
package Perl::Tidy::VerticalAligner::Line;
{
use strict;
use Carp;
use constant JMAX => 0;
use constant JMAX_ORIGINAL_LINE => 1;
use constant RTOKENS => 2;
use constant RFIELDS => 3;
use constant RPATTERNS => 4;
use constant INDENTATION => 5;
use constant LEADING_SPACE_COUNT => 6;
use constant OUTDENT_LONG_LINES => 7;
use constant LIST_TYPE => 8;
use constant IS_HANGING_SIDE_COMMENT => 9;
use constant RALIGNMENTS => 10;
use constant MAXIMUM_LINE_LENGTH => 11;
use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
my %_index_map;
$_index_map{jmax} = JMAX;
$_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
$_index_map{rtokens} = RTOKENS;
$_index_map{rfields} = RFIELDS;
$_index_map{rpatterns} = RPATTERNS;
$_index_map{indentation} = INDENTATION;
$_index_map{leading_space_count} = LEADING_SPACE_COUNT;
$_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
$_index_map{list_type} = LIST_TYPE;
$_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
$_index_map{ralignments} = RALIGNMENTS;
$_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
$_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
my @_default_data = ();
$_default_data[JMAX] = undef;
$_default_data[JMAX_ORIGINAL_LINE] = undef;
$_default_data[RTOKENS] = undef;
$_default_data[RFIELDS] = undef;
$_default_data[RPATTERNS] = undef;
$_default_data[INDENTATION] = undef;
$_default_data[LEADING_SPACE_COUNT] = undef;
$_default_data[OUTDENT_LONG_LINES] = undef;
$_default_data[LIST_TYPE] = undef;
$_default_data[IS_HANGING_SIDE_COMMENT] = undef;
$_default_data[RALIGNMENTS] = [];
$_default_data[MAXIMUM_LINE_LENGTH] = undef;
$_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
{
my $_count = 0;
sub get_count { $_count; }
sub _increment_count { ++$_count }
sub _decrement_count { --$_count }
}
sub new {
my ( $caller, %arg ) = @_;
my $caller_is_obj = ref($caller);
my $class = $caller_is_obj || $caller;
no strict "refs";
my $self = bless [], $class;
$self->[RALIGNMENTS] = [];
my $index;
foreach ( keys %_index_map ) {
$index = $_index_map{$_};
if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
else { $self->[$index] = $_default_data[$index] }
}
$self->_increment_count();
return $self;
}
sub DESTROY {
$_[0]->_decrement_count();
}
sub get_jmax { $_[0]->[JMAX] }
sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
sub get_rtokens { $_[0]->[RTOKENS] }
sub get_rfields { $_[0]->[RFIELDS] }
sub get_rpatterns { $_[0]->[RPATTERNS] }
sub get_indentation { $_[0]->[INDENTATION] }
sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
sub get_list_type { $_[0]->[LIST_TYPE] }
sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
sub get_starting_column {
$_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
}
sub increment_column {
$_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
}
sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
sub current_field_width {
my $self = shift;
my ($j) = @_;
if ( $j == 0 ) {
return $self->get_column($j);
}
else {
return $self->get_column($j) - $self->get_column( $j - 1 );
}
}
sub field_width_growth {
my $self = shift;
my $j = shift;
return $self->get_column($j) - $self->get_starting_column($j);
}
sub starting_field_width {
my $self = shift;
my $j = shift;
if ( $j == 0 ) {
return $self->get_starting_column($j);
}
else {
return $self->get_starting_column($j) -
$self->get_starting_column( $j - 1 );
}
}
sub increase_field_width {
my $self = shift;
my ( $j, $pad ) = @_;
my $jmax = $self->get_jmax();
for my $k ( $j .. $jmax ) {
$self->increment_column( $k, $pad );
}
}
sub get_available_space_on_right {
my $self = shift;
my $jmax = $self->get_jmax();
return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
}
sub set_jmax { $_[0]->[JMAX] = $_[1] }
sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
}
package Perl::Tidy::VerticalAligner::Alignment;
{
use strict;
use constant COLUMN => 0; use constant STARTING_COLUMN => 1; use constant MATCHING_TOKEN => 2; use constant STARTING_LINE => 3; use constant ENDING_LINE => 4; use constant SAVED_COLUMN => 5; use constant SERIAL_NUMBER => 6;
my %_index_map;
$_index_map{column} = COLUMN;
$_index_map{starting_column} = STARTING_COLUMN;
$_index_map{matching_token} = MATCHING_TOKEN;
$_index_map{starting_line} = STARTING_LINE;
$_index_map{ending_line} = ENDING_LINE;
$_index_map{saved_column} = SAVED_COLUMN;
$_index_map{serial_number} = SERIAL_NUMBER;
my @_default_data = ();
$_default_data[COLUMN] = undef;
$_default_data[STARTING_COLUMN] = undef;
$_default_data[MATCHING_TOKEN] = undef;
$_default_data[STARTING_LINE] = undef;
$_default_data[ENDING_LINE] = undef;
$_default_data[SAVED_COLUMN] = undef;
$_default_data[SERIAL_NUMBER] = undef;
{
my $_count = 0;
sub get_count { $_count; }
sub _increment_count { ++$_count }
sub _decrement_count { --$_count }
}
sub new {
my ( $caller, %arg ) = @_;
my $caller_is_obj = ref($caller);
my $class = $caller_is_obj || $caller;
no strict "refs";
my $self = bless [], $class;
foreach ( keys %_index_map ) {
my $index = $_index_map{$_};
if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
else { $self->[$index] = $_default_data[$index] }
}
$self->_increment_count();
return $self;
}
sub DESTROY {
$_[0]->_decrement_count();
}
sub get_column { return $_[0]->[COLUMN] }
sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
sub get_starting_line { return $_[0]->[STARTING_LINE] }
sub get_ending_line { return $_[0]->[ENDING_LINE] }
sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
sub set_column { $_[0]->[COLUMN] = $_[1] }
sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
sub increment_column { $_[0]->[COLUMN] += $_[1] }
sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
}
package Perl::Tidy::VerticalAligner;
BEGIN {
use constant VALIGN_DEBUG_FLAG_APPEND => 0;
use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
my $debug_warning = sub {
print "VALIGN_DEBUGGING with key $_[0]\n";
};
VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
}
use vars qw(
$vertical_aligner_self
$current_line
$maximum_alignment_index
$ralignment_list
$maximum_jmax_seen
$minimum_jmax_seen
$previous_minimum_jmax_seen
$previous_maximum_jmax_seen
$maximum_line_index
$group_level
$group_type
$group_maximum_gap
$marginal_match
$last_group_level_written
$last_leading_space_count
$extra_indent_ok
$zero_count
@group_lines
$last_comment_column
$last_side_comment_line_number
$last_side_comment_length
$last_side_comment_level
$outdented_line_count
$first_outdented_line_at
$last_outdented_line_at
$diagnostics_object
$logger_object
$file_writer_object
@side_comment_history
$comment_leading_space_count
$is_matching_terminal_line
$cached_line_text
$cached_line_type
$cached_line_flag
$cached_seqno
$cached_line_valid
$cached_line_leading_space_count
$cached_seqno_string
$seqno_string
$last_nonblank_seqno_string
$rOpts
$rOpts_maximum_line_length
$rOpts_continuation_indentation
$rOpts_indent_columns
$rOpts_tabs
$rOpts_entab_leading_whitespace
$rOpts_valign
$rOpts_fixed_position_side_comment
$rOpts_minimum_space_to_comment
);
sub initialize {
my $class;
( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
= @_;
$ralignment_list = [];
$group_level = 0;
$last_group_level_written = -1;
$extra_indent_ok = 0; $last_side_comment_length = 0;
$maximum_jmax_seen = 0;
$minimum_jmax_seen = 0;
$previous_minimum_jmax_seen = 0;
$previous_maximum_jmax_seen = 0;
@group_lines = ();
$outdented_line_count = 0;
$first_outdented_line_at = 0;
$last_outdented_line_at = 0;
$last_side_comment_line_number = 0;
$last_side_comment_level = -1;
$is_matching_terminal_line = 0;
$side_comment_history[0] = [ -300, 0 ];
$side_comment_history[1] = [ -200, 0 ];
$side_comment_history[2] = [ -100, 0 ];
$cached_line_text = "";
$cached_line_type = 0;
$cached_line_flag = 0;
$cached_seqno = 0;
$cached_line_valid = 0;
$cached_line_leading_space_count = 0;
$cached_seqno_string = "";
$seqno_string = "";
$last_nonblank_seqno_string = "";
$rOpts_indent_columns = $rOpts->{'indent-columns'};
$rOpts_tabs = $rOpts->{'tabs'};
$rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
$rOpts_fixed_position_side_comment =
$rOpts->{'fixed-position-side-comment'};
$rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
$rOpts_valign = $rOpts->{'valign'};
forget_side_comment();
initialize_for_new_group();
$vertical_aligner_self = {};
bless $vertical_aligner_self, $class;
return $vertical_aligner_self;
}
sub initialize_for_new_group {
$maximum_line_index = -1; $maximum_alignment_index = -1; $zero_count = 0; $current_line = undef; $group_maximum_gap = 0; $group_type = "";
$marginal_match = 0;
$comment_leading_space_count = 0;
$last_leading_space_count = 0;
}
sub write_diagnostics {
if ($diagnostics_object) {
$diagnostics_object->write_diagnostics(@_);
}
}
sub warning {
if ($logger_object) {
$logger_object->warning(@_);
}
}
sub write_logfile_entry {
if ($logger_object) {
$logger_object->write_logfile_entry(@_);
}
}
sub report_definite_bug {
if ($logger_object) {
$logger_object->report_definite_bug();
}
}
sub get_SPACES {
my $indentation = shift;
return ref($indentation) ? $indentation->get_SPACES() : $indentation;
}
sub get_RECOVERABLE_SPACES {
my $indentation = shift;
return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
}
sub get_STACK_DEPTH {
my $indentation = shift;
return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
}
sub make_alignment {
my ( $col, $token ) = @_;
++$maximum_alignment_index;
my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
column => $col,
starting_column => $col,
matching_token => $token,
starting_line => $maximum_line_index,
ending_line => $maximum_line_index,
serial_number => $maximum_alignment_index,
);
$ralignment_list->[$maximum_alignment_index] = $alignment;
return $alignment;
}
sub dump_alignments {
print
"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
for my $i ( 0 .. $maximum_alignment_index ) {
my $column = $ralignment_list->[$i]->get_column();
my $starting_column = $ralignment_list->[$i]->get_starting_column();
my $matching_token = $ralignment_list->[$i]->get_matching_token();
my $starting_line = $ralignment_list->[$i]->get_starting_line();
my $ending_line = $ralignment_list->[$i]->get_ending_line();
print
"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
}
}
sub save_alignment_columns {
for my $i ( 0 .. $maximum_alignment_index ) {
$ralignment_list->[$i]->save_column();
}
}
sub restore_alignment_columns {
for my $i ( 0 .. $maximum_alignment_index ) {
$ralignment_list->[$i]->restore_column();
}
}
sub forget_side_comment {
$last_comment_column = 0;
}
sub append_line {
my (
$level, $level_end,
$indentation, $rfields,
$rtokens, $rpatterns,
$is_forced_break, $outdent_long_lines,
$is_terminal_ternary, $is_terminal_statement,
$do_not_pad, $rvertical_tightness_flags,
$level_jump,
) = @_;
my $jmax = $
my $leading_space_count = get_SPACES($indentation);
my $is_outdented = $last_leading_space_count > $leading_space_count;
$last_leading_space_count = $leading_space_count;
my $is_hanging_side_comment =
( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
$is_outdented = 0 if $is_hanging_side_comment;
VALIGN_DEBUG_FLAG_APPEND0 && do {
print
"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
};
if ($rvertical_tightness_flags) {
if ( $maximum_line_index <= 0
&& $cached_line_type
&& $cached_seqno
&& $rvertical_tightness_flags->[2]
&& $rvertical_tightness_flags->[2] == $cached_seqno )
{
$rvertical_tightness_flags->[3] ||= 1;
$cached_line_valid ||= 1;
}
}
if ( $cached_line_type == 3
&& $maximum_line_index < 0
&& $cached_line_flag < 2
&& $level_jump != 0 )
{
$cached_line_valid = 0;
}
if ($do_not_pad) { my_flush() }
if ( $level < 0 ) { $level = 0 }
if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
$extra_indent_ok =
( $level < $group_level && $last_group_level_written < $group_level );
my_flush();
$extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
$group_level = $level;
$leading_space_count = get_SPACES($indentation);
}
my $is_blank_line = "";
my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^ if ( $group_type eq 'COMMENT' ) {
if (
(
$is_block_comment
&& $outdent_long_lines
&& $leading_space_count == $comment_leading_space_count
)
|| $is_blank_line
)
{
$group_lines[ ++$maximum_line_index ] = $rfields->[0];
return;
}
else {
my_flush();
}
}
my $j_terminal_match;
if ( $is_terminal_ternary && $current_line ) {
$j_terminal_match =
fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
$jmax = @{$rfields} - 1;
}
if ( $rfields->[0] =~ /^else\s*$/
&& $current_line
&& $level_jump == 0 )
{
$j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
$jmax = @{$rfields} - 1;
}
if ( $jmax <= 0 ) {
$zero_count++;
if ( $maximum_line_index >= 0
&& !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
{
if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
elsif (
( $zero_count > 3 )
|| ( ( $leading_space_count + length( $$rfields[0] ) ) >
$group_lines[0]->get_column(0) )
)
{
my_flush();
}
}
if ( $is_block_comment
&& $outdent_long_lines
&& $maximum_line_index < 0 )
{
$group_type = 'COMMENT';
$comment_leading_space_count = $leading_space_count;
$group_lines[ ++$maximum_line_index ] = $rfields->[0];
return;
}
if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
{
write_leader_and_string( $leading_space_count, $$rfields[0], 0,
$outdent_long_lines, $rvertical_tightness_flags );
return;
}
}
else {
$zero_count = 0;
}
if ( $jmax > 0 && ( $ warning(
"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
);
report_definite_bug();
}
my $new_line = new Perl::Tidy::VerticalAligner::Line(
jmax => $jmax,
jmax_original_line => $jmax,
rtokens => $rtokens,
rfields => $rfields,
rpatterns => $rpatterns,
indentation => $indentation,
leading_space_count => $leading_space_count,
outdent_long_lines => $outdent_long_lines,
list_type => "",
is_hanging_side_comment => $is_hanging_side_comment,
maximum_line_length => $rOpts->{'maximum-line-length'},
rvertical_tightness_flags => $rvertical_tightness_flags,
);
my $col_matching_terminal = 0;
if ( defined($j_terminal_match) ) {
$col_matching_terminal = $current_line->get_column($j_terminal_match);
$is_matching_terminal_line = 1;
}
make_side_comment( $new_line, $level_end );
if ($is_forced_break) {
decide_if_list($new_line);
}
if ($current_line) {
join_hanging_comment( $new_line, $current_line )
if $is_hanging_side_comment;
eliminate_old_fields( $new_line, $current_line );
eliminate_new_fields( $new_line, $current_line );
check_match( $new_line, $current_line );
if ($current_line) {
check_fit( $new_line, $current_line );
}
}
accept_line($new_line);
$current_line = $new_line if ( $maximum_line_index == 0 );
if ( defined($j_terminal_match) ) {
if ( $maximum_line_index == 0 ) {
my $col_now = $current_line->get_column($j_terminal_match);
my $pad = $col_matching_terminal - $col_now;
my $padding_available =
$current_line->get_available_space_on_right();
if ( $pad > 0 && $pad <= $padding_available ) {
$current_line->increase_field_width( $j_terminal_match, $pad );
}
}
my_flush();
$is_matching_terminal_line = 0;
}
VALIGN_DEBUG_FLAG_APPEND && do {
print "APPEND fields:";
dump_array(@$rfields);
print "APPEND tokens:";
dump_array(@$rtokens);
print "APPEND patterns:";
dump_array(@$rpatterns);
dump_alignments();
};
return;
}
sub join_hanging_comment {
my $line = shift;
my $jmax = $line->get_jmax();
return 0 unless $jmax == 1; my $rtokens = $line->get_rtokens();
return 0 unless $$rtokens[0] eq '#'; my $rfields = $line->get_rfields();
return 0 unless $$rfields[0] =~ /^\s*$/; my $old_line = shift;
my $maximum_field_index = $old_line->get_jmax();
return 0
unless $maximum_field_index > $jmax; my $rpatterns = $line->get_rpatterns();
$line->set_is_hanging_side_comment(1);
$jmax = $maximum_field_index;
$line->set_jmax($jmax);
$$rfields[$jmax] = $$rfields[1];
$$rtokens[ $jmax - 1 ] = $$rtokens[0];
$$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
for ( my $j = 1 ; $j < $jmax ; $j++ ) {
$$rfields[$j] = " "; $$rtokens[ $j - 1 ] = "";
$$rpatterns[ $j - 1 ] = "";
}
return 1;
}
sub eliminate_old_fields {
my $new_line = shift;
my $jmax = $new_line->get_jmax();
if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
return unless ( $maximum_line_index == 0 );
my $old_line = shift;
my $maximum_field_index = $old_line->get_jmax();
return unless $maximum_field_index > $jmax;
my $case = 1;
my $old_rtokens = $old_line->get_rtokens();
my $rtokens = $new_line->get_rtokens();
my $rpatterns = $new_line->get_rpatterns();
my $old_rpatterns = $old_line->get_rpatterns();
if ( $rtokens->[0] =~ /^=\d*$/
&& $old_rtokens->[0] eq $rtokens->[0]
&& $old_rpatterns->[0] eq $rpatterns->[0] )
{
$case = 2;
}
return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
my $old_rfields = $old_line->get_rfields();
return
if ( $case == 1
&& length( $$old_rfields[$maximum_field_index] ) == 0 );
my $rfields = $new_line->get_rfields();
my $hid_equals = 0;
my @new_alignments = ();
my @new_fields = ();
my @new_matching_patterns = ();
my @new_matching_tokens = ();
my $j = 0;
my $k;
my $current_field = '';
my $current_pattern = '';
my $in_match = 0;
for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
$current_field .= $$old_rfields[$k];
$current_pattern .= $$old_rpatterns[$k];
last if ( $j > $jmax - 1 );
if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
$in_match = 1;
$new_fields[$j] = $current_field;
$new_matching_patterns[$j] = $current_pattern;
$current_field = '';
$current_pattern = '';
$new_matching_tokens[$j] = $$old_rtokens[$k];
$new_alignments[$j] = $old_line->get_alignment($k);
$j++;
}
else {
if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
last if ( $case == 2 ); $hid_equals = 1;
}
last
if ( $in_match && $case == 1 )
; }
}
if ( ( $j == $jmax )
&& ( $current_field eq '' )
&& ( $case != 1 || $hid_equals ) )
{
$k = $maximum_field_index;
$current_field .= $$old_rfields[$k];
$current_pattern .= $$old_rpatterns[$k];
$new_fields[$j] = $current_field;
$new_matching_patterns[$j] = $current_pattern;
$new_alignments[$j] = $old_line->get_alignment($k);
$maximum_field_index = $j;
$old_line->set_alignments(@new_alignments);
$old_line->set_jmax($jmax);
$old_line->set_rtokens( \@new_matching_tokens );
$old_line->set_rfields( \@new_fields );
$old_line->set_rpatterns( \@$rpatterns );
}
}
sub make_side_comment {
my $new_line = shift;
my $level_end = shift;
my $jmax = $new_line->get_jmax();
my $rtokens = $new_line->get_rtokens();
if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
my $rfields = $new_line->get_rfields();
my $rpatterns = $new_line->get_rpatterns();
$$rtokens[$jmax] = '#';
$$rfields[ ++$jmax ] = '';
$$rpatterns[$jmax] = '#';
$new_line->set_jmax($jmax);
$new_line->set_jmax_original_line($jmax);
}
else {
my $line_number = $vertical_aligner_self->get_output_line_number();
my $rfields = $new_line->get_rfields();
if (
$line_number - $last_side_comment_line_number > 12
|| ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
)
{
forget_side_comment();
}
$last_side_comment_line_number = $line_number;
$last_side_comment_level = $level_end;
}
}
sub decide_if_list {
my $line = shift;
my $rtokens = $line->get_rtokens();
my $test_token = $$rtokens[0];
if ( $test_token =~ /^(\,|=>)/ ) {
my $list_type = $test_token;
my $jmax = $line->get_jmax();
foreach ( 1 .. $jmax - 2 ) {
if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
$list_type = "";
last;
}
}
$line->set_list_type($list_type);
}
}
sub eliminate_new_fields {
return unless ( $maximum_line_index >= 0 );
my ( $new_line, $old_line ) = @_;
my $jmax = $new_line->get_jmax();
my $old_rtokens = $old_line->get_rtokens();
my $rtokens = $new_line->get_rtokens();
my $is_assignment =
( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
my $maximum_field_index = $old_line->get_jmax();
return unless ( $maximum_field_index < $jmax );
unless ($is_assignment) {
return
unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
;
return
unless ( $maximum_field_index > 1 )
&& ( $new_line->get_list_type() !~ /^,/ );
}
my $rfields = $new_line->get_rfields();
my $rpatterns = $new_line->get_rpatterns();
my $old_rpatterns = $old_line->get_rpatterns();
my $match = 1;
my $k;
for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
|| ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
{
$match = 0;
last;
}
}
if ($match) {
for $k ( $maximum_field_index .. $jmax - 1 ) {
$$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
$$rfields[$k] = "";
$$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
$$rpatterns[$k] = "";
}
$$rtokens[ $maximum_field_index - 1 ] = '#';
$$rfields[$maximum_field_index] = $$rfields[$jmax];
$$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
$jmax = $maximum_field_index;
}
$new_line->set_jmax($jmax);
}
sub fix_terminal_ternary {
my ( $rfields, $rtokens, $rpatterns ) = @_;
my $jmax = @{$rfields} - 1;
my $old_line = $group_lines[$maximum_line_index];
my $rfields_old = $old_line->get_rfields();
my $rpatterns_old = $old_line->get_rpatterns();
my $rtokens_old = $old_line->get_rtokens();
my $maximum_field_index = $old_line->get_jmax();
my ($jquestion);
my $depth_question;
my $pad = "";
for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
my $tok = $rtokens_old->[$j];
if ( $tok =~ /^\?(\d+)$/ ) {
$depth_question = $1;
next unless ( $depth_question eq $group_level );
$jquestion = $j;
if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
$pad = " " x length($1);
}
else {
return; }
last;
}
}
return unless ( defined($jquestion) );
my $jadd = $jquestion;
my @fields = @{$rfields};
my @patterns = @{$rpatterns};
my @tokens = @{$rtokens};
VALIGN_DEBUG_FLAG_TERNARY && do {
local $" = '><';
print "CURRENT FIELDS=<@{$rfields_old}>\n";
print "CURRENT TOKENS=<@{$rtokens_old}>\n";
print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
print "UNMODIFIED FIELDS=<@{$rfields}>\n";
print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
};
# handle cases of leading colon on this line
if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
my ( $colon, $therest ) = ( $1, $2 );
# Handle sub-case of first field with leading colon plus additional code
# This is the usual situation as at the '1' below:
# ...
# : $year % 400 ? 0
# : 1;
if ($therest) {
# Split the first field after the leading colon and insert padding.
# Note that this padding will remain even if the terminal value goes
# out on a separate line. This does not seem to look to bad, so no
# mechanism has been included to undo it.
my $field1 = shift @fields;
unshift @fields, ( $colon, $pad . $therest );
# change the leading pattern from : to ?
return unless ( $patterns[0] =~ s/^\:/?/ );
# install leading tokens and patterns of existing line
unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
}
# handle sub-case of first field just equal to leading colon.
# This can happen for example in the example below where
# the leading '(' would create a new alignment token
# : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
# : ( $mname = $name . '->' );
else {
return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
# prepend a leading ? onto the second pattern
$patterns[1] = "?b" . $patterns[1];
# pad the second field
$fields[1] = $pad . $fields[1];
# install leading tokens and patterns of existing line, replacing
# leading token and inserting appropriate number of empty fields
splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
}
}
# Handle case of no leading colon on this line. This will
# be the case when -wba=':' is used. For example,
# $year % 400 ? 0 :
# 1;
else {
# install leading tokens and patterns of existing line
$patterns[0] = '?' . 'b' . $patterns[0];
unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
# insert appropriate number of empty fields
$jadd = $jquestion + 1;
$fields[0] = $pad . $fields[0];
splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
}
VALIGN_DEBUG_FLAG_TERNARY && do {
local $" = '><';
print "MODIFIED TOKENS=<@tokens>\n";
print "MODIFIED PATTERNS=<@patterns>\n";
print "MODIFIED FIELDS=<@fields>\n";
};
@{$rfields} = @fields;
@{$rtokens} = @tokens;
@{$rpatterns} = @patterns;
return $jquestion;
}
sub fix_terminal_else {
my ( $rfields, $rtokens, $rpatterns ) = @_;
my $jmax = @{$rfields} - 1;
return unless ( $jmax > 0 );
my $rfields_old = $current_line->get_rfields();
return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
my $tok_brace = $rtokens->[0];
my $depth_brace;
if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
else { return }
my $rpatterns_old = $current_line->get_rpatterns();
my $rtokens_old = $current_line->get_rtokens();
my $maximum_field_index = $current_line->get_jmax();
my $jparen = 0;
my $tok_paren = '(' . $depth_brace;
my $tok_test = $rtokens_old->[$jparen];
return unless ( $tok_test eq $tok_paren );
my ($jbrace);
for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
my $tok = $rtokens_old->[$j];
if ( $tok eq $tok_brace ) {
$jbrace = $j;
last;
}
}
return unless ( defined($jbrace) );
my $jadd = $jbrace - $jparen;
splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
splice( @{$rfields}, 1, 0, ('') x $jadd );
return $jbrace
unless ( $rfields_old->[0] =~ /^case\s*$/ );
}
{ my %is_good_alignment;
BEGIN {
@_ = qw( { ? => = );
push @_, (',');
@is_good_alignment{@_} = (1) x scalar(@_);
}
sub check_match {
my $new_line = shift;
my $old_line = shift;
my $jmax = $new_line->get_jmax();
my $maximum_field_index = $old_line->get_jmax();
if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
if (
( $maximum_field_index > $jmax ) && (
( $previous_minimum_jmax_seen <
$jmax ) || ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
)
)
{
goto NO_MATCH;
}
my $jmax_original_line = $new_line->get_jmax_original_line();
my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
my $rtokens = $new_line->get_rtokens();
my $rfields = $new_line->get_rfields();
my $rpatterns = $new_line->get_rpatterns();
my $list_type = $new_line->get_list_type();
my $group_list_type = $old_line->get_list_type();
my $old_rpatterns = $old_line->get_rpatterns();
my $old_rtokens = $old_line->get_rtokens();
my $jlimit = $jmax - 1;
if ( $maximum_field_index > $jmax ) {
$jlimit = $jmax_original_line;
--$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
}
if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
for my $j ( 0 .. $jlimit ) {
my $old_tok = $$old_rtokens[$j];
next unless $old_tok;
my $new_tok = $$rtokens[$j];
next unless $new_tok;
goto NO_MATCH
if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
|| $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
}
}
elsif ( !$is_hanging_side_comment ) {
my $leading_space_count = $new_line->get_leading_space_count();
my $max_pad = 0;
my $min_pad = 0;
my $saw_good_alignment;
for my $j ( 0 .. $jlimit ) {
my $old_tok = $$old_rtokens[$j];
my $new_tok = $$rtokens[$j];
my $alignment_token = $new_tok;
if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
my $tokens_match = $new_tok eq $old_tok
|| ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
if ( !$tokens_match ) {
if (
$j == $jlimit
&& ( $j > 0 || $old_tok =~ /^,/ )
)
{
$marginal_match = 1
if ( $marginal_match == 0
&& $maximum_line_index == 0 );
last;
}
goto NO_MATCH;
}
my $pad =
length( $$rfields[$j] ) - $old_line->current_field_width($j);
if ( $j == 0 ) { $pad += $leading_space_count; }
if ( $alignment_token ne '#' ) {
if ( $pad > $max_pad ) { $max_pad = $pad }
if ( $pad < $min_pad ) { $min_pad = $pad }
}
if ( $is_good_alignment{$alignment_token} ) {
$saw_good_alignment = 1;
}
if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
$marginal_match = 1
if ( $marginal_match == 0 && $maximum_line_index == 0 );
if ( $alignment_token eq ',' ) {
goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
}
elsif ( $alignment_token eq '(' ) {
if ( $pad != 0 ) { goto NO_MATCH }
}
elsif ( $alignment_token eq '=' ) {
if (
substr( $$old_rpatterns[$j], 0, 1 ) ne
substr( $$rpatterns[$j], 0, 1 ) )
{
goto NO_MATCH;
}
elsif ( $maximum_line_index == 0 ) {
$marginal_match =
2; }
}
}
if ( $maximum_field_index > $jmax ) {
if ( $pad > 0 ) { goto NO_MATCH; }
}
}
if ( $marginal_match == 1
&& $jmax == $maximum_field_index
&& ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
)
{
$marginal_match = 0;
}
}
if ( $maximum_field_index > $jmax ) {
my $comment = $$rfields[$jmax];
for $jmax ( $jlimit .. $maximum_field_index ) {
$$rtokens[$jmax] = $$old_rtokens[$jmax];
$$rfields[ ++$jmax ] = '';
$$rpatterns[$jmax] = $$old_rpatterns[$jmax];
}
$$rfields[$jmax] = $comment;
$new_line->set_jmax($jmax);
}
return;
NO_MATCH:
my_flush();
return;
}
}
sub check_fit {
return unless ( $maximum_line_index >= 0 );
my $new_line = shift;
my $old_line = shift;
my $jmax = $new_line->get_jmax();
my $leading_space_count = $new_line->get_leading_space_count();
my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
my $rtokens = $new_line->get_rtokens();
my $rfields = $new_line->get_rfields();
my $rpatterns = $new_line->get_rpatterns();
my $group_list_type = $group_lines[0]->get_list_type();
my $padding_so_far = 0;
my $padding_available = $old_line->get_available_space_on_right();
save_alignment_columns();
my ( $j, $pad, $eight );
my $maximum_field_index = $old_line->get_jmax();
for $j ( 0 .. $jmax ) {
$pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
if ( $j == 0 ) {
$pad += $leading_space_count;
}
if ( $pad < 0
&& $group_maximum_gap < -$pad
&& $j > 0
&& $j < $jmax - 1 )
{
$group_maximum_gap = -$pad;
}
next if $pad < 0;
if (
( $pad > $padding_available )
)
{
restore_alignment_columns();
my_flush();
last;
}
next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
$old_line->increase_field_width( $j, $pad );
$padding_available -= $pad;
if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
$group_maximum_gap = $pad;
}
}
}
sub accept_line {
my $new_line = shift;
$group_lines[ ++$maximum_line_index ] = $new_line;
if ( $maximum_line_index == 0 ) {
my $jmax = $new_line->get_jmax();
my $rfields = $new_line->get_rfields();
my $rtokens = $new_line->get_rtokens();
my $j;
my $col = $new_line->get_leading_space_count();
for $j ( 0 .. $jmax ) {
$col += length( $$rfields[$j] );
my $token = "";
if ( $j < $jmax ) { $token = $$rtokens[$j] }
my $alignment = make_alignment( $col, $token );
$new_line->set_alignment( $j, $alignment );
}
$maximum_jmax_seen = $jmax;
$minimum_jmax_seen = $jmax;
}
else {
my @new_alignments =
$group_lines[ $maximum_line_index - 1 ]->get_alignments();
$new_line->set_alignments(@new_alignments);
}
$previous_minimum_jmax_seen = $minimum_jmax_seen;
$previous_maximum_jmax_seen = $maximum_jmax_seen;
}
sub dump_array {
local $" = ')(';
print "(@_)\n";
}
# flush() sends the current Perl::Tidy::VerticalAligner group down the
# pipeline to Perl::Tidy::FileWriter.
# This is the external flush, which also empties the cache
sub flush {
if ( $maximum_line_index < 0 ) {
if ($cached_line_type) {
$seqno_string = $cached_seqno_string;
entab_and_output( $cached_line_text,
$cached_line_leading_space_count,
$last_group_level_written );
$cached_line_type = 0;
$cached_line_text = "";
$cached_seqno_string = "";
}
}
else {
my_flush();
}
}
# This is the internal flush, which leaves the cache intact
sub my_flush {
return if ( $maximum_line_index < 0 );
# handle a group of comment lines
if ( $group_type eq 'COMMENT' ) {
VALIGN_DEBUG_FLAG_APPEND0 && do {
my ( $a, $b, $c ) = caller();
print
"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
};
my $leading_space_count = $comment_leading_space_count;
my $leading_string = get_leading_string($leading_space_count);
# zero leading space count if any lines are too long
my $max_excess = 0;
for my $i ( 0 .. $maximum_line_index ) {
my $str = $group_lines[$i];
my $excess =
length($str) + $leading_space_count - $rOpts_maximum_line_length;
if ( $excess > $max_excess ) {
$max_excess = $excess;
}
}
if ( $max_excess > 0 ) {
$leading_space_count -= $max_excess;
if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
$last_outdented_line_at =
$file_writer_object->get_output_line_number();
unless ($outdented_line_count) {
$first_outdented_line_at = $last_outdented_line_at;
}
$outdented_line_count += ( $maximum_line_index + 1 );
}
# write the group of lines
my $outdent_long_lines = 0;
for my $i ( 0 .. $maximum_line_index ) {
write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
$outdent_long_lines, "" );
}
}
# handle a group of code lines
else {
VALIGN_DEBUG_FLAG_APPEND0 && do {
my $group_list_type = $group_lines[0]->get_list_type();
my ( $a, $b, $c ) = caller();
my $maximum_field_index = $group_lines[0]->get_jmax();
print
"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
};
# some small groups are best left unaligned
my $do_not_align = decide_if_aligned();
# optimize side comment location
$do_not_align = adjust_side_comment($do_not_align);
# recover spaces for -lp option if possible
my $extra_leading_spaces = get_extra_leading_spaces();
# all lines of this group have the same basic leading spacing
my $group_leader_length = $group_lines[0]->get_leading_space_count();
# add extra leading spaces if helpful
my $min_ci_gap = improve_continuation_indentation( $do_not_align,
$group_leader_length );
# loop to output all lines
for my $i ( 0 .. $maximum_line_index ) {
my $line = $group_lines[$i];
write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
$group_leader_length, $extra_leading_spaces );
}
}
initialize_for_new_group();
}
sub decide_if_aligned {
# Do not try to align two lines which are not really similar
return unless $maximum_line_index == 1;
return if ($is_matching_terminal_line);
my $group_list_type = $group_lines[0]->get_list_type();
my $do_not_align = (
# always align lists
!$group_list_type
&& (
# don't align if it was just a marginal match
$marginal_match
# don't align two lines with big gap
|| $group_maximum_gap > 12
# or lines with differing number of alignment tokens
# TODO: this could be improved. It occasionally rejects
# good matches.
|| $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
)
);
# But try to convert them into a simple comment group if the first line
# a has side comment
my $rfields = $group_lines[0]->get_rfields();
my $maximum_field_index = $group_lines[0]->get_jmax();
if ( $do_not_align
&& ( $maximum_line_index > 0 )
&& ( length( $$rfields[$maximum_field_index] ) > 0 ) )
{
combine_fields();
$do_not_align = 0;
}
return $do_not_align;
}
sub adjust_side_comment {
my $do_not_align = shift;
# let's see if we can move the side comment field out a little
# to improve readability (the last field is always a side comment field)
my $have_side_comment = 0;
my $first_side_comment_line = -1;
my $maximum_field_index = $group_lines[0]->get_jmax();
for my $i ( 0 .. $maximum_line_index ) {
my $line = $group_lines[$i];
if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
$have_side_comment = 1;
$first_side_comment_line = $i;
last;
}
}
my $kmax = $maximum_field_index + 1;
if ($have_side_comment) {
my $line = $group_lines[0];
# the maximum space without exceeding the line length:
my $avail = $line->get_available_space_on_right();
# try to use the previous comment column
my $side_comment_column = $line->get_column( $kmax - 2 );
my $move = $last_comment_column - $side_comment_column;
## my $sc_line0 = $side_comment_history[0]->[0];
## my $sc_col0 = $side_comment_history[0]->[1];
## my $sc_line1 = $side_comment_history[1]->[0];
## my $sc_col1 = $side_comment_history[1]->[1];
## my $sc_line2 = $side_comment_history[2]->[0];
## my $sc_col2 = $side_comment_history[2]->[1];
##
## # FUTURE UPDATES:
## # Be sure to ignore 'do not align' and '} # end comments'
## # Find first $move > 0 and $move <= $avail as follows:
## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
## # 2. try sc_col2 if (line-sc_line2) < 12
## # 3. try min possible space, plus up to 8,
## # 4. try min possible space
if ( $kmax > 0 && !$do_not_align ) {
# but if this doesn't work, give up and use the minimum space
if ( $move > $avail ) {
$move = $rOpts_minimum_space_to_comment - 1;
}
# but we want some minimum space to the comment
my $min_move = $rOpts_minimum_space_to_comment - 1;
if ( $move >= 0
&& $last_side_comment_length > 0
&& ( $first_side_comment_line == 0 )
&& $group_level == $last_group_level_written )
{
$min_move = 0;
}
if ( $move < $min_move ) {
$move = $min_move;
}
# prevously, an upper bound was placed on $move here,
# (maximum_space_to_comment), but it was not helpful
# don't exceed the available space
if ( $move > $avail ) { $move = $avail }
# we can only increase space, never decrease
if ( $move > 0 ) {
$line->increase_field_width( $maximum_field_index - 1, $move );
}
# remember this column for the next group
$last_comment_column = $line->get_column( $kmax - 2 );
}
else {
# try to at least line up the existing side comment location
if ( $kmax > 0 && $move > 0 && $move < $avail ) {
$line->increase_field_width( $maximum_field_index - 1, $move );
$do_not_align = 0;
}
# reset side comment column if we can't align
else {
forget_side_comment();
}
}
}
return $do_not_align;
}
sub improve_continuation_indentation {
my ( $do_not_align, $group_leader_length ) = @_;
# See if we can increase the continuation indentation
# to move all continuation lines closer to the next field
# (unless it is a comment).
#
# '$min_ci_gap'is the extra indentation that we may need to introduce.
# We will only introduce this to fields which already have some ci.
# Without this variable, we would occasionally get something like this
# (Complex.pm):
#
# use overload '+' => \&plus,
# '-' => \&minus,
# '*' => \&multiply,
# ...
# 'tan' => \&tan,
# 'atan2' => \&atan2,
#
# Whereas with this variable, we can shift variables over to get this:
#
# use overload '+' => \&plus,
# '-' => \&minus,
# '*' => \&multiply,
# ...
# 'tan' => \&tan,
# 'atan2' => \&atan2,
## BUB: Deactivated####################
# The trouble with this patch is that it may, for example,
# move in some 'or's or ':'s, and leave some out, so that the
# left edge alignment suffers.
return 0;
###########################################
my $maximum_field_index = $group_lines[0]->get_jmax();
my $min_ci_gap = $rOpts_maximum_line_length;
if ( $maximum_field_index > 1 && !$do_not_align ) {
for my $i ( 0 .. $maximum_line_index ) {
my $line = $group_lines[$i];
my $leading_space_count = $line->get_leading_space_count();
my $rfields = $line->get_rfields();
my $gap =
$line->get_column(0) -
$leading_space_count -
length( $$rfields[0] );
if ( $leading_space_count > $group_leader_length ) {
if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
}
}
if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
$min_ci_gap = 0;
}
}
else {
$min_ci_gap = 0;
}
return $min_ci_gap;
}
sub write_vertically_aligned_line {
my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
$extra_leading_spaces )
= @_;
my $rfields = $line->get_rfields();
my $leading_space_count = $line->get_leading_space_count();
my $outdent_long_lines = $line->get_outdent_long_lines();
my $maximum_field_index = $line->get_jmax();
my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
# add any extra spaces
if ( $leading_space_count > $group_leader_length ) {
$leading_space_count += $min_ci_gap;
}
my $str = $$rfields[0];
# loop to concatenate all fields of this line and needed padding
my $total_pad_count = 0;
my ( $j, $pad );
for $j ( 1 .. $maximum_field_index ) {
# skip zero-length side comments
last
if ( ( $j == $maximum_field_index )
&& ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
);
# compute spaces of padding before this field
my $col = $line->get_column( $j - 1 );
$pad = $col - ( length($str) + $leading_space_count );
if ($do_not_align) {
$pad =
( $j < $maximum_field_index )
? 0
: $rOpts_minimum_space_to_comment - 1;
}
# if the -fpsc flag is set, move the side comment to the selected
# column if and only if it is possible, ignoring constraints on
# line length and minimum space to comment
if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
{
my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
if ( $newpad >= 0 ) { $pad = $newpad; }
}
# accumulate the padding
if ( $pad > 0 ) { $total_pad_count += $pad; }
# add this field
if ( !defined $$rfields[$j] ) {
write_diagnostics("UNDEFined field at j=$j\n");
}
# only add padding when we have a finite field;
# this avoids extra terminal spaces if we have empty fields
if ( length( $$rfields[$j] ) > 0 ) {
$str .= ' ' x $total_pad_count;
$total_pad_count = 0;
$str .= $$rfields[$j];
}
else {
$total_pad_count = 0;
}
# update side comment history buffer
if ( $j == $maximum_field_index ) {
my $lineno = $file_writer_object->get_output_line_number();
shift @side_comment_history;
push @side_comment_history, [ $lineno, $col ];
}
}
my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
# ship this line off
write_leader_and_string( $leading_space_count + $extra_leading_spaces,
$str, $side_comment_length, $outdent_long_lines,
$rvertical_tightness_flags );
}
sub get_extra_leading_spaces {
#----------------------------------------------------------
# Define any extra indentation space (for the -lp option).
# Here is why:
# If a list has side comments, sub scan_list must dump the
# list before it sees everything. When this happens, it sets
# the indentation to the standard scheme, but notes how
# many spaces it would have liked to use. We may be able
# to recover that space here in the event that that all of the
# lines of a list are back together again.
#----------------------------------------------------------
my $extra_leading_spaces = 0;
if ($extra_indent_ok) {
my $object = $group_lines[0]->get_indentation();
if ( ref($object) ) {
my $extra_indentation_spaces_wanted =
get_RECOVERABLE_SPACES($object);
# all indentation objects must be the same
my $i;
for $i ( 1 .. $maximum_line_index ) {
if ( $object != $group_lines[$i]->get_indentation() ) {
$extra_indentation_spaces_wanted = 0;
last;
}
}
if ($extra_indentation_spaces_wanted) {
# the maximum space without exceeding the line length:
my $avail = $group_lines[0]->get_available_space_on_right();
$extra_leading_spaces =
( $avail > $extra_indentation_spaces_wanted )
? $extra_indentation_spaces_wanted
: $avail;
# update the indentation object because with -icp the terminal
# ');' will use the same adjustment.
$object->permanently_decrease_AVAILABLE_SPACES(
-$extra_leading_spaces );
}
}
}
return $extra_leading_spaces;
}
sub combine_fields {
# combine all fields except for the comment field ( sidecmt.t )
# Uses global variables:
# @group_lines
# $maximum_line_index
my ( $j, $k );
my $maximum_field_index = $group_lines[0]->get_jmax();
for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
my $line = $group_lines[$j];
my $rfields = $line->get_rfields();
foreach ( 1 .. $maximum_field_index - 1 ) {
$$rfields[0] .= $$rfields[$_];
}
$$rfields[1] = $$rfields[$maximum_field_index];
$line->set_jmax(1);
$line->set_column( 0, 0 );
$line->set_column( 1, 0 );
}
$maximum_field_index = 1;
for $j ( 0 .. $maximum_line_index ) {
my $line = $group_lines[$j];
my $rfields = $line->get_rfields();
for $k ( 0 .. $maximum_field_index ) {
my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
if ( $k == 0 ) {
$pad += $group_lines[$j]->get_leading_space_count();
}
if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
}
}
}
sub get_output_line_number {
# the output line number reported to a caller is the number of items
# written plus the number of items in the buffer
my $self = shift;
1 + $maximum_line_index + $file_writer_object->get_output_line_number();
}
sub write_leader_and_string {
my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
$rvertical_tightness_flags )
= @_;
# handle outdenting of long lines:
if ($outdent_long_lines) {
my $excess =
length($str) -
$side_comment_length +
$leading_space_count -
$rOpts_maximum_line_length;
if ( $excess > 0 ) {
$leading_space_count = 0;
$last_outdented_line_at =
$file_writer_object->get_output_line_number();
unless ($outdented_line_count) {
$first_outdented_line_at = $last_outdented_line_at;
}
$outdented_line_count++;
}
}
# Make preliminary leading whitespace. It could get changed
# later by entabbing, so we have to keep track of any changes
# to the leading_space_count from here on.
my $leading_string =
$leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
# Unpack any recombination data; it was packed by
# sub send_lines_to_vertical_aligner. Contents:
#
# [0] type: 1=opening 2=closing 3=opening block brace
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
# if closing: spaces of padding to use
# [2] sequence number of container
# [3] valid flag: do not append if this flag is false
#
my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
$seqno_end );
if ($rvertical_tightness_flags) {
(
$open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
$seqno_end
) = @{$rvertical_tightness_flags};
}
$seqno_string = $seqno_end;
# handle any cached line ..
# either append this line to it or write it out
if ( length($cached_line_text) ) {
if ( !$cached_line_valid ) {
entab_and_output( $cached_line_text,
$cached_line_leading_space_count,
$last_group_level_written );
}
# handle cached line with opening container token
elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
my $gap = $leading_space_count - length($cached_line_text);
# handle option of just one tight opening per line:
if ( $cached_line_flag == 1 ) {
if ( defined($open_or_close) && $open_or_close == 1 ) {
$gap = -1;
}
}
if ( $gap >= 0 ) {
$leading_string = $cached_line_text . ' ' x $gap;
$leading_space_count = $cached_line_leading_space_count;
$seqno_string = $cached_seqno_string . ':' . $seqno_beg;
}
else {
entab_and_output( $cached_line_text,
$cached_line_leading_space_count,
$last_group_level_written );
}
}
# handle cached line to place before this closing container token
else {
my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
if ( length($test_line) <= $rOpts_maximum_line_length ) {
$seqno_string = $cached_seqno_string . ':' . $seqno_beg;
# Patch to outdent closing tokens ending # in ');'
# If we are joining a line like ');' to a previous stacked
# set of closing tokens, then decide if we may outdent the
# combined stack to the indentation of the ');'. Since we
# should not normally outdent any of the other tokens more than
# the indentation of the lines that contained them, we will
# only do this if all of the corresponding opening
# tokens were on the same line. This can happen with
# -sot and -sct. For example, it is ok here:
# __PACKAGE__->load_components( qw(
# PK::Auto
# Core
# ));
#
# But, for example, we do not outdent in this example because
# that would put the closing sub brace out farther than the
# opening sub brace:
#
# perltidy -sot -sct
# $c->Tk::bind(
# '<Control-f>' => sub {
# my ($c) = @_;
# my $e = $c->XEvent;
# itemsUnderArea $c;
# } );
#
if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
# The way to tell this is if the stacked sequence numbers
# of this output line are the reverse of the stacked
# sequence numbers of the previous non-blank line of
# sequence numbers. So we can join if the previous
# nonblank string of tokens is the mirror image. For
# example if stack )}] is 13:8:6 then we are looking for a
# leading stack like [{( which is 6:8:13 We only need to
# check the two ends, because the intermediate tokens must
# fall in order. Note on speed: having to split on colons
# and eliminate multiple colons might appear to be slow,
# but it's not an issue because we almost never come
# through here. In a typical file we don't.
$seqno_string =~ s/^:+//;
$last_nonblank_seqno_string =~ s/^:+//;
$seqno_string =~ s/:+/:/g;
$last_nonblank_seqno_string =~ s/:+/:/g;
# how many spaces can we outdent?
my $diff =
$cached_line_leading_space_count - $leading_space_count;
if ( $diff > 0
&& length($seqno_string)
&& length($last_nonblank_seqno_string) ==
length($seqno_string) )
{
my @seqno_last =
( split ':', $last_nonblank_seqno_string );
my @seqno_now = ( split ':', $seqno_string );
if ( $seqno_now[-1] == $seqno_last[0]
&& $seqno_now[0] == $seqno_last[-1] )
{
# OK to outdent ..
# for absolute safety, be sure we only remove
# whitespace
my $ws = substr( $test_line, 0, $diff );
if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
$test_line = substr( $test_line, $diff );
$cached_line_leading_space_count -= $diff;
}
# shouldn't happen, but not critical:
##else {
## ERROR transferring indentation here
##}
}
}
}
$str = $test_line;
$leading_string = "";
$leading_space_count = $cached_line_leading_space_count;
}
else {
entab_and_output( $cached_line_text,
$cached_line_leading_space_count,
$last_group_level_written );
}
}
}
$cached_line_type = 0;
$cached_line_text = "";
# make the line to be written
my $line = $leading_string . $str;
# write or cache this line
if ( !$open_or_close || $side_comment_length > 0 ) {
entab_and_output( $line, $leading_space_count, $group_level );
}
else {
$cached_line_text = $line;
$cached_line_type = $open_or_close;
$cached_line_flag = $tightness_flag;
$cached_seqno = $seqno;
$cached_line_valid = $valid;
$cached_line_leading_space_count = $leading_space_count;
$cached_seqno_string = $seqno_string;
}
$last_group_level_written = $group_level;
$last_side_comment_length = $side_comment_length;
$extra_indent_ok = 0;
}
sub entab_and_output {
my ( $line, $leading_space_count, $level ) = @_;
# The line is currently correct if there is no tabbing (recommended!)
# We may have to lop off some leading spaces and replace with tabs.
if ( $leading_space_count > 0 ) {
# Nothing to do if no tabs
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
{
# nothing to do
}
# Handle entab option
elsif ($rOpts_entab_leading_whitespace) {
my $space_count =
$leading_space_count % $rOpts_entab_leading_whitespace;
my $tab_count =
int( $leading_space_count / $rOpts_entab_leading_whitespace );
my $leading_string = "\t" x $tab_count . ' ' x $space_count;
if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
substr( $line, 0, $leading_space_count ) = $leading_string;
}
else {
# REMOVE AFTER TESTING
# shouldn't happen - program error counting whitespace
# we'll skip entabbing
warning(
"Error entabbing in entab_and_output: expected count=$leading_space_count\n"
);
}
}
# Handle option of one tab per level
else {
my $leading_string = ( "\t" x $level );
my $space_count =
$leading_space_count - $level * $rOpts_indent_columns;
# shouldn't happen:
if ( $space_count < 0 ) {
warning(
"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
);
$leading_string = ( ' ' x $leading_space_count );
}
else {
$leading_string .= ( ' ' x $space_count );
}
if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
substr( $line, 0, $leading_space_count ) = $leading_string;
}
else {
# REMOVE AFTER TESTING
# shouldn't happen - program error counting whitespace
# we'll skip entabbing
warning(
"Error entabbing in entab_and_output: expected count=$leading_space_count\n"
);
}
}
}
$file_writer_object->write_code_line( $line . "\n" );
if ($seqno_string) {
$last_nonblank_seqno_string = $seqno_string;
}
}
{ # begin get_leading_string
my @leading_string_cache;
sub get_leading_string {
# define the leading whitespace string for this line..
my $leading_whitespace_count = shift;
# Handle case of zero whitespace, which includes multi-line quotes
# (which may have a finite level; this prevents tab problems)
if ( $leading_whitespace_count <= 0 ) {
return "";
}
# look for previous result
elsif ( $leading_string_cache[$leading_whitespace_count] ) {
return $leading_string_cache[$leading_whitespace_count];
}
# must compute a string for this number of spaces
my $leading_string;
# Handle simple case of no tabs
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
|| $rOpts_indent_columns <= 0 )
{
$leading_string = ( ' ' x $leading_whitespace_count );
}
# Handle entab option
elsif ($rOpts_entab_leading_whitespace) {
my $space_count =
$leading_whitespace_count % $rOpts_entab_leading_whitespace;
my $tab_count = int(
$leading_whitespace_count / $rOpts_entab_leading_whitespace );
$leading_string = "\t" x $tab_count . ' ' x $space_count;
}
# Handle option of one tab per level
else {
$leading_string = ( "\t" x $group_level );
my $space_count =
$leading_whitespace_count - $group_level * $rOpts_indent_columns;
# shouldn't happen:
if ( $space_count < 0 ) {
warning(
"Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
);
$leading_string = ( ' ' x $leading_whitespace_count );
}
else {
$leading_string .= ( ' ' x $space_count );
}
}
$leading_string_cache[$leading_whitespace_count] = $leading_string;
return $leading_string;
}
} # end get_leading_string
sub report_anything_unusual {
my $self = shift;
if ( $outdented_line_count > 0 ) {
write_logfile_entry(
"$outdented_line_count long lines were outdented:\n");
write_logfile_entry(
" First at output line $first_outdented_line_at\n");
if ( $outdented_line_count > 1 ) {
write_logfile_entry(
" Last at output line $last_outdented_line_at\n");
}
write_logfile_entry(
" use -noll to prevent outdenting, -l=n to increase line length\n"
);
write_logfile_entry("\n");
}
}
#####################################################################
#
# the Perl::Tidy::FileWriter class writes the output file
#
#####################################################################
package Perl::Tidy::FileWriter;
# Maximum number of little messages; probably need not be changed.
use constant MAX_NAG_MESSAGES => 6;
sub write_logfile_entry {
my $self = shift;
my $logger_object = $self->{_logger_object};
if ($logger_object) {
$logger_object->write_logfile_entry(@_);
}
}
sub new {
my $class = shift;
my ( $line_sink_object, $rOpts, $logger_object ) = @_;
bless {
_line_sink_object => $line_sink_object,
_logger_object => $logger_object,
_rOpts => $rOpts,
_output_line_number => 1,
_consecutive_blank_lines => 0,
_consecutive_nonblank_lines => 0,
_first_line_length_error => 0,
_max_line_length_error => 0,
_last_line_length_error => 0,
_first_line_length_error_at => 0,
_max_line_length_error_at => 0,
_last_line_length_error_at => 0,
_line_length_error_count => 0,
_max_output_line_length => 0,
_max_output_line_length_at => 0,
}, $class;
}
sub tee_on {
my $self = shift;
$self->{_line_sink_object}->tee_on();
}
sub tee_off {
my $self = shift;
$self->{_line_sink_object}->tee_off();
}
sub get_output_line_number {
my $self = shift;
return $self->{_output_line_number};
}
sub decrement_output_line_number {
my $self = shift;
$self->{_output_line_number}--;
}
sub get_consecutive_nonblank_lines {
my $self = shift;
return $self->{_consecutive_nonblank_lines};
}
sub reset_consecutive_blank_lines {
my $self = shift;
$self->{_consecutive_blank_lines} = 0;
}
sub want_blank_line {
my $self = shift;
unless ( $self->{_consecutive_blank_lines} ) {
$self->write_blank_code_line();
}
}
sub write_blank_code_line {
my $self = shift;
my $rOpts = $self->{_rOpts};
return
if ( $self->{_consecutive_blank_lines} >=
$rOpts->{'maximum-consecutive-blank-lines'} );
$self->{_consecutive_blank_lines}++;
$self->{_consecutive_nonblank_lines} = 0;
$self->write_line("\n");
}
sub write_code_line {
my $self = shift;
my $a = shift;
if ( $a =~ /^\s*$/ ) {
my $rOpts = $self->{_rOpts};
return
if ( $self->{_consecutive_blank_lines} >=
$rOpts->{'maximum-consecutive-blank-lines'} );
$self->{_consecutive_blank_lines}++;
$self->{_consecutive_nonblank_lines} = 0;
}
else {
$self->{_consecutive_blank_lines} = 0;
$self->{_consecutive_nonblank_lines}++;
}
$self->write_line($a);
}
sub write_line {
my $self = shift;
my $a = shift;
# TODO: go through and see if the test is necessary here
if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
$self->{_line_sink_object}->write_line($a);
# This calculation of excess line length ignores any internal tabs
my $rOpts = $self->{_rOpts};
my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
if ( $a =~ /^\t+/g ) {
$exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
}
# Note that we just incremented output line number to future value
# so we must subtract 1 for current line number
if ( length($a) > 1 + $self->{_max_output_line_length} ) {
$self->{_max_output_line_length} = length($a) - 1;
$self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
}
if ( $exceed > 0 ) {
my $output_line_number = $self->{_output_line_number};
$self->{_last_line_length_error} = $exceed;
$self->{_last_line_length_error_at} = $output_line_number - 1;
if ( $self->{_line_length_error_count} == 0 ) {
$self->{_first_line_length_error} = $exceed;
$self->{_first_line_length_error_at} = $output_line_number - 1;
}
if (
$self->{_last_line_length_error} > $self->{_max_line_length_error} )
{
$self->{_max_line_length_error} = $exceed;
$self->{_max_line_length_error_at} = $output_line_number - 1;
}
if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
$self->write_logfile_entry(
"Line length exceeded by $exceed characters\n");
}
$self->{_line_length_error_count}++;
}
}
sub report_line_length_errors {
my $self = shift;
my $rOpts = $self->{_rOpts};
my $line_length_error_count = $self->{_line_length_error_count};
if ( $line_length_error_count == 0 ) {
$self->write_logfile_entry(
"No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
my $max_output_line_length = $self->{_max_output_line_length};
my $max_output_line_length_at = $self->{_max_output_line_length_at};
$self->write_logfile_entry(
" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
);
}
else {
my $word = ( $line_length_error_count > 1 ) ? "s" : "";
$self->write_logfile_entry(
"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
);
$word = ( $line_length_error_count > 1 ) ? "First" : "";
my $first_line_length_error = $self->{_first_line_length_error};
my $first_line_length_error_at = $self->{_first_line_length_error_at};
$self->write_logfile_entry(
" $word at line $first_line_length_error_at by $first_line_length_error characters\n"
);
if ( $line_length_error_count > 1 ) {
my $max_line_length_error = $self->{_max_line_length_error};
my $max_line_length_error_at = $self->{_max_line_length_error_at};
my $last_line_length_error = $self->{_last_line_length_error};
my $last_line_length_error_at = $self->{_last_line_length_error_at};
$self->write_logfile_entry(
" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
);
$self->write_logfile_entry(
" Last at line $last_line_length_error_at by $last_line_length_error characters\n"
);
}
}
}
#####################################################################
#
# The Perl::Tidy::Debugger class shows line tokenization
#
#####################################################################
package Perl::Tidy::Debugger;
sub new {
my ( $class, $filename ) = @_;
bless {
_debug_file => $filename,
_debug_file_opened => 0,
_fh => undef,
}, $class;
}
sub really_open_debug_file {
my $self = shift;
my $debug_file = $self->{_debug_file};
my $fh;
unless ( $fh = IO::File->new("> $debug_file") ) {
warn("can't open $debug_file: $!\n");
}
$self->{_debug_file_opened} = 1;
$self->{_fh} = $fh;
print $fh
"Use -dump-token-types (-dtt) to get a list of token type codes\n";
}
sub close_debug_file {
my $self = shift;
my $fh = $self->{_fh};
if ( $self->{_debug_file_opened} ) {
eval { $self->{_fh}->close() };
}
}
sub write_debug_entry {
# This is a debug dump routine which may be modified as necessary
# to dump tokens on a line-by-line basis. The output will be written
# to the .DEBUG file when the -D flag is entered.
my $self = shift;
my $line_of_tokens = shift;
my $input_line = $line_of_tokens->{_line_text};
my $rtoken_type = $line_of_tokens->{_rtoken_type};
my $rtokens = $line_of_tokens->{_rtokens};
my $rlevels = $line_of_tokens->{_rlevels};
my $rslevels = $line_of_tokens->{_rslevels};
my $rblock_type = $line_of_tokens->{_rblock_type};
my $input_line_number = $line_of_tokens->{_line_number};
my $line_type = $line_of_tokens->{_line_type};
my ( $j, $num );
my $token_str = "$input_line_number: ";
my $reconstructed_original = "$input_line_number: ";
my $block_str = "$input_line_number: ";
#$token_str .= "$line_type: ";
#$reconstructed_original .= "$line_type: ";
my $pattern = "";
my @next_char = ( '"', '"' );
my $i_next = 0;
unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
my $fh = $self->{_fh};
for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
# testing patterns
if ( $$rtoken_type[$j] eq 'k' ) {
$pattern .= $$rtokens[$j];
}
else {
$pattern .= $$rtoken_type[$j];
}
$reconstructed_original .= $$rtokens[$j];
$block_str .= "($$rblock_type[$j])";
$num = length( $$rtokens[$j] );
my $type_str = $$rtoken_type[$j];
# be sure there are no blank tokens (shouldn't happen)
if ( $type_str eq ' ' ) {
print $fh "BLANK TOKEN on the next line\n";
$type_str = $next_char[$i_next];
$i_next = 1 - $i_next;
}
if ( length($type_str) == 1 ) {
$type_str = $type_str x $num;
}
$token_str .= $type_str;
}
print $fh "$reconstructed_original\n";
print $fh "$token_str\n";
}
package Perl::Tidy::LineBuffer;
sub new {
my $class = shift;
my $line_source_object = shift;
return bless {
_line_source_object => $line_source_object,
_rlookahead_buffer => [],
}, $class;
}
sub peek_ahead {
my $self = shift;
my $buffer_index = shift;
my $line = undef;
my $line_source_object = $self->{_line_source_object};
my $rlookahead_buffer = $self->{_rlookahead_buffer};
if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
$line = $$rlookahead_buffer[$buffer_index];
}
else {
$line = $line_source_object->get_line();
push( @$rlookahead_buffer, $line );
}
return $line;
}
sub get_line {
my $self = shift;
my $line = undef;
my $line_source_object = $self->{_line_source_object};
my $rlookahead_buffer = $self->{_rlookahead_buffer};
if ( scalar(@$rlookahead_buffer) ) {
$line = shift @$rlookahead_buffer;
}
else {
$line = $line_source_object->get_line();
}
return $line;
}
package Perl::Tidy::Tokenizer;
BEGIN {
use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
my $debug_warning = sub {
print "TOKENIZER_DEBUGGING with key $_[0]\n";
};
TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
}
use Carp;
use vars qw{
$tokenizer_self
$last_nonblank_token
$last_nonblank_type
$last_nonblank_block_type
$statement_type
$in_attribute_list
$current_package
$context
%is_constant
%is_user_function
%user_function_prototype
%is_block_function
%is_block_list_function
%saw_function_definition
$brace_depth
$paren_depth
$square_bracket_depth
@current_depth
@total_depth
$total_depth
@nesting_sequence_number
@current_sequence_number
@paren_type
@paren_semicolon_count
@paren_structural_type
@brace_type
@brace_structural_type
@brace_statement_type
@brace_context
@brace_package
@square_bracket_type
@square_bracket_structural_type
@depth_array
@nested_ternary_flag
@starting_line_of_current_depth
};
use vars qw{
%is_indirect_object_taker
%is_block_operator
%expecting_operator_token
%expecting_operator_types
%expecting_term_types
%expecting_term_token
%is_digraph
%is_file_test_operator
%is_trigraph
%is_valid_token_type
%is_keyword
%is_code_block_token
%really_want_term
@opening_brace_names
@closing_brace_names
%is_keyword_taking_list
%is_q_qq_qw_qx_qr_s_y_tr_m
};
use constant TERM => -1;
use constant UNKNOWN => 0;
use constant OPERATOR => 1;
use constant SCALAR_CONTEXT => -1;
use constant UNKNOWN_CONTEXT => 0;
use constant LIST_CONTEXT => 1;
use constant MAX_NAG_MESSAGES => 6;
{
my $_count = 0;
sub get_count { $_count; }
sub _increment_count { ++$_count }
sub _decrement_count { --$_count }
}
sub DESTROY {
$_[0]->_decrement_count();
}
sub new {
my $class = shift;
my %defaults = (
source_object => undef,
debugger_object => undef,
diagnostics_object => undef,
logger_object => undef,
starting_level => undef,
indent_columns => 4,
tabs => 0,
look_for_hash_bang => 0,
trim_qw => 1,
look_for_autoloader => 1,
look_for_selfloader => 1,
starting_line_number => 1,
);
my %args = ( %defaults, @_ );
my $source_object = $args{source_object};
my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
$tokenizer_self = {
_rhere_target_list => [],
_in_here_doc => 0,
_here_doc_target => "",
_here_quote_character => "",
_in_data => 0,
_in_end => 0,
_in_format => 0,
_in_error => 0,
_in_pod => 0,
_in_attribute_list => 0,
_in_quote => 0,
_quote_target => "",
_line_start_quote => -1,
_starting_level => $args{starting_level},
_know_starting_level => defined( $args{starting_level} ),
_tabs => $args{tabs},
_indent_columns => $args{indent_columns},
_look_for_hash_bang => $args{look_for_hash_bang},
_trim_qw => $args{trim_qw},
_input_tabstr => "",
_know_input_tabstr => -1,
_last_line_number => $args{starting_line_number} - 1,
_saw_perl_dash_P => 0,
_saw_perl_dash_w => 0,
_saw_use_strict => 0,
_saw_v_string => 0,
_look_for_autoloader => $args{look_for_autoloader},
_look_for_selfloader => $args{look_for_selfloader},
_saw_autoloader => 0,
_saw_selfloader => 0,
_saw_hash_bang => 0,
_saw_end => 0,
_saw_data => 0,
_saw_negative_indentation => 0,
_started_tokenizing => 0,
_line_buffer_object => $line_buffer_object,
_debugger_object => $args{debugger_object},
_diagnostics_object => $args{diagnostics_object},
_logger_object => $args{logger_object},
_unexpected_error_count => 0,
_started_looking_for_here_target_at => 0,
_nearly_matched_here_target_at => undef,
_line_text => "",
_rlower_case_labels_at => undef,
};
prepare_for_a_new_file();
find_starting_indentation_level();
bless $tokenizer_self, $class;
if ( _increment_count() > 1 ) {
confess
"Attempt to create more than 1 object in $class, which is not a true class yet\n";
}
return $tokenizer_self;
}
sub warning {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->warning(@_);
}
}
sub complain {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->complain(@_);
}
}
sub write_logfile_entry {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->write_logfile_entry(@_);
}
}
sub interrupt_logfile {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->interrupt_logfile();
}
}
sub resume_logfile {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->resume_logfile();
}
}
sub increment_brace_error {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->increment_brace_error();
}
}
sub report_definite_bug {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->report_definite_bug();
}
}
sub brace_warning {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->brace_warning(@_);
}
}
sub get_saw_brace_error {
my $logger_object = $tokenizer_self->{_logger_object};
if ($logger_object) {
$logger_object->get_saw_brace_error();
}
else {
0;
}
}
sub write_diagnostics {
if ( $tokenizer_self->{_diagnostics_object} ) {
$tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
}
}
sub report_tokenization_errors {
my $self = shift;
my $level = get_indentation_level();
if ( $level != $tokenizer_self->{_starting_level} ) {
warning("final indentation level: $level\n");
}
check_final_nesting_depths();
if ( $tokenizer_self->{_look_for_hash_bang}
&& !$tokenizer_self->{_saw_hash_bang} )
{
warning(
"hit EOF without seeing hash-bang line; maybe don't need -x?\n");
}
if ( $tokenizer_self->{_in_format} ) {
warning("hit EOF while in format description\n");
}
if ( $tokenizer_self->{_in_pod} ) {
if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
write_logfile_entry(
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
);
}
else {
complain(
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
);
}
}
if ( $tokenizer_self->{_in_here_doc} ) {
my $here_doc_target = $tokenizer_self->{_here_doc_target};
my $started_looking_for_here_target_at =
$tokenizer_self->{_started_looking_for_here_target_at};
if ($here_doc_target) {
warning(
"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
);
}
else {
warning(
"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
);
}
my $nearly_matched_here_target_at =
$tokenizer_self->{_nearly_matched_here_target_at};
if ($nearly_matched_here_target_at) {
warning(
"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
);
}
}
if ( $tokenizer_self->{_in_quote} ) {
my $line_start_quote = $tokenizer_self->{_line_start_quote};
my $quote_target = $tokenizer_self->{_quote_target};
my $what =
( $tokenizer_self->{_in_attribute_list} )
? "attribute list"
: "quote/pattern";
warning(
"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
);
}
unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
if ( $] < 5.006 ) {
write_logfile_entry("Suggest including '-w parameter'\n");
}
else {
write_logfile_entry("Suggest including 'use warnings;'\n");
}
}
if ( $tokenizer_self->{_saw_perl_dash_P} ) {
write_logfile_entry("Use of -P parameter for defines is discouraged\n");
}
unless ( $tokenizer_self->{_saw_use_strict} ) {
write_logfile_entry("Suggest including 'use strict;'\n");
}
if ( $tokenizer_self->{_rlower_case_labels_at} ) {
my @lower_case_labels_at =
@{ $tokenizer_self->{_rlower_case_labels_at} };
write_logfile_entry(
"Suggest using upper case characters in label(s)\n");
local $" = ')(';
write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
}
}
sub report_v_string {
# warn if this version can't handle v-strings
my $tok = shift;
unless ( $tokenizer_self->{_saw_v_string} ) {
$tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
}
if ( $] < 5.006 ) {
warning(
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
);
}
}
sub get_input_line_number {
return $tokenizer_self->{_last_line_number};
}
# returns the next tokenized line
sub get_line {
my $self = shift;
# USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
# $square_bracket_depth, $paren_depth
my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
$tokenizer_self->{_line_text} = $input_line;
return undef unless ($input_line);
my $input_line_number = ++$tokenizer_self->{_last_line_number};
# Find and remove what characters terminate this line, including any
# control r
my $input_line_separator = "";
if ( chomp($input_line) ) { $input_line_separator = $/ }
# TODO: what other characters should be included here?
if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
$input_line_separator = $2 . $input_line_separator;
}
# for backwards compatability we keep the line text terminated with
# a newline character
$input_line .= "\n";
$tokenizer_self->{_line_text} = $input_line; # update
# create a data structure describing this line which will be
# returned to the caller.
# _line_type codes are:
# SYSTEM - system-specific code before hash-bang line
# CODE - line of perl code (including comments)
# POD_START - line starting pod, such as '=head'
# POD - pod documentation text
# POD_END - last line of pod section, '=cut'
# HERE - text of here-document
# HERE_END - last line of here-doc (target word)
# FORMAT - format section
# FORMAT_END - last line of format section, '.'
# DATA_START - __DATA__ line
# DATA - unidentified text following __DATA__
# END_START - __END__ line
# END - unidentified text following __END__
# ERROR - we are in big trouble, probably not a perl script
# Other variables:
# _curly_brace_depth - depth of curly braces at start of line
# _square_bracket_depth - depth of square brackets at start of line
# _paren_depth - depth of parens at start of line
# _starting_in_quote - this line continues a multi-line quote
# (so don't trim leading blanks!)
# _ending_in_quote - this line ends in a multi-line quote
# (so don't trim trailing blanks!)
my $line_of_tokens = {
_line_type => 'EOF',
_line_text => $input_line,
_line_number => $input_line_number,
_rtoken_type => undef,
_rtokens => undef,
_rlevels => undef,
_rslevels => undef,
_rblock_type => undef,
_rcontainer_type => undef,
_rcontainer_environment => undef,
_rtype_sequence => undef,
_rnesting_tokens => undef,
_rci_levels => undef,
_rnesting_blocks => undef,
_python_indentation_level => -1, ## 0,
_starting_in_quote => 0, # to be set by subroutine
_ending_in_quote => 0,
_curly_brace_depth => $brace_depth,
_square_bracket_depth => $square_bracket_depth,
_paren_depth => $paren_depth,
_quote_character => '',
};
# must print line unchanged if we are in a here document
if ( $tokenizer_self->{_in_here_doc} ) {
$line_of_tokens->{_line_type} = 'HERE';
my $here_doc_target = $tokenizer_self->{_here_doc_target};
my $here_quote_character = $tokenizer_self->{_here_quote_character};
my $candidate_target = $input_line;
chomp $candidate_target;
if ( $candidate_target eq $here_doc_target ) {
$tokenizer_self->{_nearly_matched_here_target_at} = undef;
$line_of_tokens->{_line_type} = 'HERE_END';
write_logfile_entry("Exiting HERE document $here_doc_target\n");
my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
if (@$rhere_target_list) { # there can be multiple here targets
( $here_doc_target, $here_quote_character ) =
@{ shift @$rhere_target_list };
$tokenizer_self->{_here_doc_target} = $here_doc_target;
$tokenizer_self->{_here_quote_character} =
$here_quote_character;
write_logfile_entry(
"Entering HERE document $here_doc_target\n");
$tokenizer_self->{_nearly_matched_here_target_at} = undef;
$tokenizer_self->{_started_looking_for_here_target_at} =
$input_line_number;
}
else {
$tokenizer_self->{_in_here_doc} = 0;
$tokenizer_self->{_here_doc_target} = "";
$tokenizer_self->{_here_quote_character} = "";
}
}
# check for error of extra whitespace
# note for PERL6: leading whitespace is allowed
else {
$candidate_target =~ s/\s*$//;
$candidate_target =~ s/^\s*//;
if ( $candidate_target eq $here_doc_target ) {
$tokenizer_self->{_nearly_matched_here_target_at} =
$input_line_number;
}
}
return $line_of_tokens;
}
# must print line unchanged if we are in a format section
elsif ( $tokenizer_self->{_in_format} ) {
if ( $input_line =~ /^\.[\s#]*$/ ) {
write_logfile_entry("Exiting format section\n");
$tokenizer_self->{_in_format} = 0;
$line_of_tokens->{_line_type} = 'FORMAT_END';
}
else {
$line_of_tokens->{_line_type} = 'FORMAT';
}
return $line_of_tokens;
}
# must print line unchanged if we are in pod documentation
elsif ( $tokenizer_self->{_in_pod} ) {
$line_of_tokens->{_line_type} = 'POD';
if ( $input_line =~ /^=cut/ ) {
$line_of_tokens->{_line_type} = 'POD_END';
write_logfile_entry("Exiting POD section\n");
$tokenizer_self->{_in_pod} = 0;
}
if ( $input_line =~ /^\#\!.*perl\b/ ) {
warning(
"Hash-bang in pod can cause older versions of perl to fail! \n"
);
}
return $line_of_tokens;
}
# must print line unchanged if we have seen a severe error (i.e., we
# are seeing illegal tokens and connot continue. Syntax errors do
# not pass this route). Calling routine can decide what to do, but
# the default can be to just pass all lines as if they were after __END__
elsif ( $tokenizer_self->{_in_error} ) {
$line_of_tokens->{_line_type} = 'ERROR';
return $line_of_tokens;
}
# print line unchanged if we are __DATA__ section
elsif ( $tokenizer_self->{_in_data} ) {
# ...but look for POD
# Note that the _in_data and _in_end flags remain set
# so that we return to that state after seeing the
# end of a pod section
if ( $input_line =~ /^=(?!cut)/ ) {
$line_of_tokens->{_line_type} = 'POD_START';
write_logfile_entry("Entering POD section\n");
$tokenizer_self->{_in_pod} = 1;
return $line_of_tokens;
}
else {
$line_of_tokens->{_line_type} = 'DATA';
return $line_of_tokens;
}
}
# print line unchanged if we are in __END__ section
elsif ( $tokenizer_self->{_in_end} ) {
# ...but look for POD
# Note that the _in_data and _in_end flags remain set
# so that we return to that state after seeing the
# end of a pod section
if ( $input_line =~ /^=(?!cut)/ ) {
$line_of_tokens->{_line_type} = 'POD_START';
write_logfile_entry("Entering POD section\n");
$tokenizer_self->{_in_pod} = 1;
return $line_of_tokens;
}
else {
$line_of_tokens->{_line_type} = 'END';
return $line_of_tokens;
}
}
# check for a hash-bang line if we haven't seen one
if ( !$tokenizer_self->{_saw_hash_bang} ) {
if ( $input_line =~ /^\#\!.*perl\b/ ) {
$tokenizer_self->{_saw_hash_bang} = $input_line_number;
# check for -w and -P flags
if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
$tokenizer_self->{_saw_perl_dash_P} = 1;
}
if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
$tokenizer_self->{_saw_perl_dash_w} = 1;
}
if ( ( $input_line_number > 1 )
&& ( !$tokenizer_self->{_look_for_hash_bang} ) )
{
# this is helpful for VMS systems; we may have accidentally
# tokenized some DCL commands
if ( $tokenizer_self->{_started_tokenizing} ) {
warning(
"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
);
}
else {
complain("Useless hash-bang after line 1\n");
}
}
# Report the leading hash-bang as a system line
# This will prevent -dac from deleting it
else {
$line_of_tokens->{_line_type} = 'SYSTEM';
return $line_of_tokens;
}
}
}
# wait for a hash-bang before parsing if the user invoked us with -x
if ( $tokenizer_self->{_look_for_hash_bang}
&& !$tokenizer_self->{_saw_hash_bang} )
{
$line_of_tokens->{_line_type} = 'SYSTEM';
return $line_of_tokens;
}
# a first line of the form ': #' will be marked as SYSTEM
# since lines of this form may be used by tcsh
if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
$line_of_tokens->{_line_type} = 'SYSTEM';
return $line_of_tokens;
}
# now we know that it is ok to tokenize the line...
# the line tokenizer will modify any of these private variables:
# _rhere_target_list
# _in_data
# _in_end
# _in_format
# _in_error
# _in_pod
# _in_quote
my $ending_in_quote_last = $tokenizer_self->{_in_quote};
tokenize_this_line($line_of_tokens);
# Now finish defining the return structure and return it
$line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
# handle severe error (binary data in script)
if ( $tokenizer_self->{_in_error} ) {
$tokenizer_self->{_in_quote} = 0; # to avoid any more messages
warning("Giving up after error\n");
$line_of_tokens->{_line_type} = 'ERROR';
reset_indentation_level(0); # avoid error messages
return $line_of_tokens;
}
# handle start of pod documentation
if ( $tokenizer_self->{_in_pod} ) {
# This gets tricky..above a __DATA__ or __END__ section, perl
# accepts '=cut' as the start of pod section. But afterwards,
# only pod utilities see it and they may ignore an =cut without
# leading =head. In any case, this isn't good.
if ( $input_line =~ /^=cut\b/ ) {
if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
complain("=cut while not in pod ignored\n");
$tokenizer_self->{_in_pod} = 0;
$line_of_tokens->{_line_type} = 'POD_END';
}
else {
$line_of_tokens->{_line_type} = 'POD_START';
complain(
"=cut starts a pod section .. this can fool pod utilities.\n"
);
write_logfile_entry("Entering POD section\n");
}
}
else {
$line_of_tokens->{_line_type} = 'POD_START';
write_logfile_entry("Entering POD section\n");
}
return $line_of_tokens;
}
# update indentation levels for log messages
if ( $input_line !~ /^\s*$/ ) {
my $rlevels = $line_of_tokens->{_rlevels};
my $structural_indentation_level = $$rlevels[0];
my ( $python_indentation_level, $msg ) =
find_indentation_level( $input_line, $structural_indentation_level );
if ($msg) { write_logfile_entry("$msg") }
if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
$line_of_tokens->{_python_indentation_level} =
$python_indentation_level;
}
}
# see if this line contains here doc targets
my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
if (@$rhere_target_list) {
my ( $here_doc_target, $here_quote_character ) =
@{ shift @$rhere_target_list };
$tokenizer_self->{_in_here_doc} = 1;
$tokenizer_self->{_here_doc_target} = $here_doc_target;
$tokenizer_self->{_here_quote_character} = $here_quote_character;
write_logfile_entry("Entering HERE document $here_doc_target\n");
$tokenizer_self->{_started_looking_for_here_target_at} =
$input_line_number;
}
# NOTE: __END__ and __DATA__ statements are written unformatted
# because they can theoretically contain additional characters
# which are not tokenized (and cannot be read with <DATA> either!).
if ( $tokenizer_self->{_in_data} ) {
$line_of_tokens->{_line_type} = 'DATA_START';
write_logfile_entry("Starting __DATA__ section\n");
$tokenizer_self->{_saw_data} = 1;
# keep parsing after __DATA__ if use SelfLoader was seen
if ( $tokenizer_self->{_saw_selfloader} ) {
$tokenizer_self->{_in_data} = 0;
write_logfile_entry(
"SelfLoader seen, continuing; -nlsl deactivates\n");
}
return $line_of_tokens;
}
elsif ( $tokenizer_self->{_in_end} ) {
$line_of_tokens->{_line_type} = 'END_START';
write_logfile_entry("Starting __END__ section\n");
$tokenizer_self->{_saw_end} = 1;
# keep parsing after __END__ if use AutoLoader was seen
if ( $tokenizer_self->{_saw_autoloader} ) {
$tokenizer_self->{_in_end} = 0;
write_logfile_entry(
"AutoLoader seen, continuing; -nlal deactivates\n");
}
return $line_of_tokens;
}
# now, finally, we know that this line is type 'CODE'
$line_of_tokens->{_line_type} = 'CODE';
# remember if we have seen any real code
if ( !$tokenizer_self->{_started_tokenizing}
&& $input_line !~ /^\s*$/
&& $input_line !~ /^\s*#/ )
{
$tokenizer_self->{_started_tokenizing} = 1;
}
if ( $tokenizer_self->{_debugger_object} ) {
$tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
}
# Note: if keyword 'format' occurs in this line code, it is still CODE
# (keyword 'format' need not start a line)
if ( $tokenizer_self->{_in_format} ) {
write_logfile_entry("Entering format section\n");
}
if ( $tokenizer_self->{_in_quote}
and ( $tokenizer_self->{_line_start_quote} < 0 ) )
{
#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
if (
( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
{
$tokenizer_self->{_line_start_quote} = $input_line_number;
write_logfile_entry(
"Start multi-line quote or pattern ending in $quote_target\n");
}
}
elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
and !$tokenizer_self->{_in_quote} )
{
$tokenizer_self->{_line_start_quote} = -1;
write_logfile_entry("End of multi-line quote or pattern\n");
}
# we are returning a line of CODE
return $line_of_tokens;
}
sub find_starting_indentation_level {
# USES GLOBAL VARIABLES: $tokenizer_self
my $starting_level = 0;
my $know_input_tabstr = -1; # flag for find_indentation_level
# use value if given as parameter
if ( $tokenizer_self->{_know_starting_level} ) {
$starting_level = $tokenizer_self->{_starting_level};
}
# if we know there is a hash_bang line, the level must be zero
elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
$tokenizer_self->{_know_starting_level} = 1;
}
# otherwise figure it out from the input file
else {
my $line;
my $i = 0;
my $structural_indentation_level = -1; # flag for find_indentation_level
my $msg = "";
while ( $line =
$tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
{
# if first line is #! then assume starting level is zero
if ( $i == 1 && $line =~ /^\#\!/ ) {
$starting_level = 0;
last;
}
next if ( $line =~ /^\s*#/ ); # must not be comment
next if ( $line =~ /^\s*$/ ); # must not be blank
( $starting_level, $msg ) =
find_indentation_level( $line, $structural_indentation_level );
if ($msg) { write_logfile_entry("$msg") }
last;
}
$msg = "Line $i implies starting-indentation-level = $starting_level\n";
if ( $starting_level > 0 ) {
my $input_tabstr = $tokenizer_self->{_input_tabstr};
if ( $input_tabstr eq "\t" ) {
$msg .= "by guessing input tabbing uses 1 tab per level\n";
}
else {
my $cols = length($input_tabstr);
$msg .=
"by guessing input tabbing uses $cols blanks per level\n";
}
}
write_logfile_entry("$msg");
}
$tokenizer_self->{_starting_level} = $starting_level;
reset_indentation_level($starting_level);
}
# Find indentation level given a input line. At the same time, try to
# figure out the input tabbing scheme.
#
# There are two types of calls:
#
# Type 1: $structural_indentation_level < 0
# In this case we have to guess $input_tabstr to figure out the level.
#
# Type 2: $structural_indentation_level >= 0
# In this case the level of this line is known, and this routine can
# update the tabbing string, if still unknown, to make the level correct.
sub find_indentation_level {
my ( $line, $structural_indentation_level ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self
my $level = 0;
my $msg = "";
my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
my $input_tabstr = $tokenizer_self->{_input_tabstr};
# find leading whitespace
my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
# make first guess at input tabbing scheme if necessary
if ( $know_input_tabstr < 0 ) {
$know_input_tabstr = 0;
if ( $tokenizer_self->{_tabs} ) {
$input_tabstr = "\t";
if ( length($leading_whitespace) > 0 ) {
if ( $leading_whitespace !~ /\t/ ) {
my $cols = $tokenizer_self->{_indent_columns};
if ( length($leading_whitespace) < $cols ) {
$cols = length($leading_whitespace);
}
$input_tabstr = " " x $cols;
}
}
}
else {
$input_tabstr = " " x $tokenizer_self->{_indent_columns};
if ( length($leading_whitespace) > 0 ) {
if ( $leading_whitespace =~ /^\t/ ) {
$input_tabstr = "\t";
}
}
}
$tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
$tokenizer_self->{_input_tabstr} = $input_tabstr;
}
# determine the input tabbing scheme if possible
if ( ( $know_input_tabstr == 0 )
&& ( length($leading_whitespace) > 0 )
&& ( $structural_indentation_level > 0 ) )
{
my $saved_input_tabstr = $input_tabstr;
# check for common case of one tab per indentation level
if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
$input_tabstr = "\t";
$msg = "Guessing old indentation was tab character\n";
}
}
else {
# detab any tabs based on 8 blanks per tab
my $entabbed = "";
if ( $leading_whitespace =~ s/^\t+/ /g ) {
$entabbed = "entabbed";
}
# now compute tabbing from number of spaces
my $columns =
length($leading_whitespace) / $structural_indentation_level;
if ( $columns == int $columns ) {
$msg =
"Guessing old indentation was $columns $entabbed spaces\n";
}
else {
$columns = int $columns;
$msg =
"old indentation is unclear, using $columns $entabbed spaces\n";
}
$input_tabstr = " " x $columns;
}
$know_input_tabstr = 1;
$tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
$tokenizer_self->{_input_tabstr} = $input_tabstr;
# see if mistakes were made
if ( ( $tokenizer_self->{_starting_level} > 0 )
&& !$tokenizer_self->{_know_starting_level} )
{
if ( $input_tabstr ne $saved_input_tabstr ) {
complain(
"I made a bad starting level guess; rerun with a value for -sil \n"
);
}
}
}
# use current guess at input tabbing to get input indentation level
#
# Patch to handle a common case of entabbed leading whitespace
# If the leading whitespace equals 4 spaces and we also have
# tabs, detab the input whitespace assuming 8 spaces per tab.
if ( length($input_tabstr) == 4 ) {
$leading_whitespace =~ s/^\t+/ /g;
}
if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
my $pos = 0;
while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
{
$pos += $len_tab;
$level++;
}
}
return ( $level, $msg );
}
# This is a currently unused debug routine
sub dump_functions {
my $fh = *STDOUT;
my ( $pkg, $sub );
foreach $pkg ( keys %is_user_function ) {
print $fh "\nnon-constant subs in package $pkg\n";
foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
my $msg = "";
if ( $is_block_list_function{$pkg}{$sub} ) {
$msg = 'block_list';
}
if ( $is_block_function{$pkg}{$sub} ) {
$msg = 'block';
}
print $fh "$sub $msg\n";
}
}
foreach $pkg ( keys %is_constant ) {
print $fh "\nconstants and constant subs in package $pkg\n";
foreach $sub ( keys %{ $is_constant{$pkg} } ) {
print $fh "$sub\n";
}
}
}
sub ones_count {
# count number of 1's in a string of 1's and 0's
# example: ones_count("010101010101") gives 6
return ( my $cis = $_[0] ) =~ tr/1/0/;
}
sub prepare_for_a_new_file {
# previous tokens needed to determine what to expect next
$last_nonblank_token = ';'; # the only possible starting state which
$last_nonblank_type = ';'; # will make a leading brace a code block
$last_nonblank_block_type = '';
# scalars for remembering statement types across multiple lines
$statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
$in_attribute_list = 0;
# scalars for remembering where we are in the file
$current_package = "main";
$context = UNKNOWN_CONTEXT;
# hashes used to remember function information
%is_constant = (); # user-defined constants
%is_user_function = (); # user-defined functions
%user_function_prototype = (); # their prototypes
%is_block_function = ();
%is_block_list_function = ();
%saw_function_definition = ();
# variables used to track depths of various containers
# and report nesting errors
$paren_depth = 0;
$brace_depth = 0;
$square_bracket_depth = 0;
@current_depth[ 0 .. $#closing_brace_names ] =
(0) x scalar @closing_brace_names;
$total_depth = 0;
@total_depth = ();
@nesting_sequence_number[ 0 .. $#closing_brace_names ] =
( 0 .. $#closing_brace_names );
@current_sequence_number = ();
$paren_type[$paren_depth] = '';
$paren_semicolon_count[$paren_depth] = 0;
$paren_structural_type[$brace_depth] = '';
$brace_type[$brace_depth] = ';'; # identify opening brace as code block
$brace_structural_type[$brace_depth] = '';
$brace_statement_type[$brace_depth] = "";
$brace_context[$brace_depth] = UNKNOWN_CONTEXT;
$brace_package[$paren_depth] = $current_package;
$square_bracket_type[$square_bracket_depth] = '';
$square_bracket_structural_type[$square_bracket_depth] = '';
initialize_tokenizer_state();
}
{ # begin tokenize_this_line
use constant BRACE => 0;
use constant SQUARE_BRACKET => 1;
use constant PAREN => 2;
use constant QUESTION_COLON => 3;
# TV1: scalars for processing one LINE.
# Re-initialized on each entry to sub tokenize_this_line.
my (
$block_type, $container_type, $expecting,
$i, $i_tok, $input_line,
$input_line_number, $last_nonblank_i, $max_token_index,
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
$type, $type_sequence, $indent_flag,
);
# TV2: refs to ARRAYS for processing one LINE
# Re-initialized on each call.
my $routput_token_list = []; # stack of output token indexes
my $routput_token_type = []; # token types
my $routput_block_type = []; # types of code block
my $routput_container_type = []; # paren types, such as if, elsif, ..
my $routput_type_sequence = []; # nesting sequential number
my $routput_indent_flag = []; #
# TV3: SCALARS for quote variables. These are initialized with a
# subroutine call and continually updated as lines are processed.
my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
# TV4: SCALARS for multi-line identifiers and
# statements. These are initialized with a subroutine call
# and continually updated as lines are processed.
my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
# TV5: SCALARS for tracking indentation level.
# Initialized once and continually updated as lines are
# processed.
my (
$nesting_token_string, $nesting_type_string,
$nesting_block_string, $nesting_block_flag,
$nesting_list_string, $nesting_list_flag,
$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
$in_statement_continuation, $level_in_tokenizer,
$slevel_in_tokenizer, $rslevel_stack,
);
# TV6: SCALARS for remembering several previous
# tokens. Initialized once and continually updated as
# lines are processed.
my (
$last_nonblank_container_type, $last_nonblank_type_sequence,
$last_last_nonblank_token, $last_last_nonblank_type,
$last_last_nonblank_block_type, $last_last_nonblank_container_type,
$last_last_nonblank_type_sequence, $last_nonblank_prototype,
);
# ----------------------------------------------------------------
# beginning of tokenizer variable access and manipulation routines
# ----------------------------------------------------------------
sub initialize_tokenizer_state {
# TV1: initialized on each call
# TV2: initialized on each call
# TV3:
$in_quote = 0;
$quote_type = 'Q';
$quote_character = "";
$quote_pos = 0;
$quote_depth = 0;
$quoted_string_1 = "";
$quoted_string_2 = "";
$allowed_quote_modifiers = "";
# TV4:
$id_scan_state = '';
$identifier = '';
$want_paren = "";
$indented_if_level = 0;
# TV5:
$nesting_token_string = "";
$nesting_type_string = "";
$nesting_block_string = '1'; # initially in a block
$nesting_block_flag = 1;
$nesting_list_string = '0'; # initially not in a list
$nesting_list_flag = 0; # initially not in a list
$ci_string_in_tokenizer = "";
$continuation_string_in_tokenizer = "0";
$in_statement_continuation = 0;
$level_in_tokenizer = 0;
$slevel_in_tokenizer = 0;
$rslevel_stack = [];
# TV6:
$last_nonblank_container_type = '';
$last_nonblank_type_sequence = '';
$last_last_nonblank_token = ';';
$last_last_nonblank_type = ';';
$last_last_nonblank_block_type = '';
$last_last_nonblank_container_type = '';
$last_last_nonblank_type_sequence = '';
$last_nonblank_prototype = "";
}
sub save_tokenizer_state {
my $rTV1 = [
$block_type, $container_type, $expecting,
$i, $i_tok, $input_line,
$input_line_number, $last_nonblank_i, $max_token_index,
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
$type, $type_sequence, $indent_flag,
];
my $rTV2 = [
$routput_token_list, $routput_token_type,
$routput_block_type, $routput_container_type,
$routput_type_sequence, $routput_indent_flag,
];
my $rTV3 = [
$in_quote, $quote_type,
$quote_character, $quote_pos,
$quote_depth, $quoted_string_1,
$quoted_string_2, $allowed_quote_modifiers,
];
my $rTV4 =
[ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
my $rTV5 = [
$nesting_token_string, $nesting_type_string,
$nesting_block_string, $nesting_block_flag,
$nesting_list_string, $nesting_list_flag,
$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
$in_statement_continuation, $level_in_tokenizer,
$slevel_in_tokenizer, $rslevel_stack,
];
my $rTV6 = [
$last_nonblank_container_type,
$last_nonblank_type_sequence,
$last_last_nonblank_token,
$last_last_nonblank_type,
$last_last_nonblank_block_type,
$last_last_nonblank_container_type,
$last_last_nonblank_type_sequence,
$last_nonblank_prototype,
];
return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
}
sub restore_tokenizer_state {
my ($rstate) = @_;
my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
(
$block_type, $container_type, $expecting,
$i, $i_tok, $input_line,
$input_line_number, $last_nonblank_i, $max_token_index,
$next_tok, $next_type, $peeked_ahead,
$prototype, $rhere_target_list, $rtoken_map,
$rtoken_type, $rtokens, $tok,
$type, $type_sequence, $indent_flag,
) = @{$rTV1};
(
$routput_token_list, $routput_token_type,
$routput_block_type, $routput_container_type,
$routput_type_sequence, $routput_type_sequence,
) = @{$rTV2};
(
$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
) = @{$rTV3};
( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
@{$rTV4};
(
$nesting_token_string, $nesting_type_string,
$nesting_block_string, $nesting_block_flag,
$nesting_list_string, $nesting_list_flag,
$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
$in_statement_continuation, $level_in_tokenizer,
$slevel_in_tokenizer, $rslevel_stack,
) = @{$rTV5};
(
$last_nonblank_container_type,
$last_nonblank_type_sequence,
$last_last_nonblank_token,
$last_last_nonblank_type,
$last_last_nonblank_block_type,
$last_last_nonblank_container_type,
$last_last_nonblank_type_sequence,
$last_nonblank_prototype,
) = @{$rTV6};
}
sub get_indentation_level {
# patch to avoid reporting error if indented if is not terminated
if ($indented_if_level) { return $level_in_tokenizer - 1 }
return $level_in_tokenizer;
}
sub reset_indentation_level {
$level_in_tokenizer = $_[0];
$slevel_in_tokenizer = $_[0];
push @{$rslevel_stack}, $slevel_in_tokenizer;
}
sub peeked_ahead {
$peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
}
# ------------------------------------------------------------
# end of tokenizer variable access and manipulation routines
# ------------------------------------------------------------
# ------------------------------------------------------------
# beginning of various scanner interface routines
# ------------------------------------------------------------
sub scan_replacement_text {
# check for here-docs in replacement text invoked by
# a substitution operator with executable modifier 'e'.
#
# given:
# $replacement_text
# return:
# $rht = reference to any here-doc targets
my ($replacement_text) = @_;
# quick check
return undef unless ( $replacement_text =~ /<</ );
write_logfile_entry("scanning replacement text for here-doc targets\n");
# save the logger object for error messages
my $logger_object = $tokenizer_self->{_logger_object};
# localize all package variables
local (
$tokenizer_self, $last_nonblank_token,
$last_nonblank_type, $last_nonblank_block_type,
$statement_type, $in_attribute_list,
$current_package, $context,
%is_constant, %is_user_function,
%user_function_prototype, %is_block_function,
%is_block_list_function, %saw_function_definition,
$brace_depth, $paren_depth,
$square_bracket_depth, @current_depth,
@total_depth, $total_depth,
@nesting_sequence_number, @current_sequence_number,
@paren_type, @paren_semicolon_count,
@paren_structural_type, @brace_type,
@brace_structural_type, @brace_statement_type,
@brace_context, @brace_package,
@square_bracket_type, @square_bracket_structural_type,
@depth_array, @starting_line_of_current_depth,
@nested_ternary_flag,
);
# save all lexical variables
my $rstate = save_tokenizer_state();
_decrement_count(); # avoid error check for multiple tokenizers
# make a new tokenizer
my $rOpts = {};
my $rpending_logfile_message;
my $source_object =
Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
$rpending_logfile_message );
my $tokenizer = Perl::Tidy::Tokenizer->new(
source_object => $source_object,
logger_object => $logger_object,
starting_line_number => $input_line_number,
);
# scan the replacement text
1 while ( $tokenizer->get_line() );
# remove any here doc targets
my $rht = undef;
if ( $tokenizer_self->{_in_here_doc} ) {
$rht = [];
push @{$rht},
[
$tokenizer_self->{_here_doc_target},
$tokenizer_self->{_here_quote_character}
];
if ( $tokenizer_self->{_rhere_target_list} ) {
push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
$tokenizer_self->{_rhere_target_list} = undef;
}
$tokenizer_self->{_in_here_doc} = undef;
}
# now its safe to report errors
$tokenizer->report_tokenization_errors();
# restore all tokenizer lexical variables
restore_tokenizer_state($rstate);
# return the here doc targets
return $rht;
}
sub scan_bare_identifier {
( $i, $tok, $type, $prototype ) =
scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
$rtoken_map, $max_token_index );
}
sub scan_identifier {
( $i, $tok, $type, $id_scan_state, $identifier ) =
scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
$max_token_index, $expecting );
}
sub scan_id {
( $i, $tok, $type, $id_scan_state ) =
scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
$id_scan_state, $max_token_index );
}
sub scan_number {
my $number;
( $i, $type, $number ) =
scan_number_do( $input_line, $i, $rtoken_map, $type,
$max_token_index );
return $number;
}
# a sub to warn if token found where term expected
sub error_if_expecting_TERM {
if ( $expecting == TERM ) {
if ( $really_want_term{$last_nonblank_type} ) {
unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
$rtoken_type, $input_line );
1;
}
}
}
# a sub to warn if token found where operator expected
sub error_if_expecting_OPERATOR {
if ( $expecting == OPERATOR ) {
my $thing = defined $_[0] ? $_[0] : $tok;
unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
$rtoken_map, $rtoken_type, $input_line );
if ( $i_tok == 0 ) {
interrupt_logfile();
warning("Missing ';' above?\n");
resume_logfile();
}
1;
}
}
# ------------------------------------------------------------
# end scanner interfaces
# ------------------------------------------------------------
my %is_for_foreach;
@_ = qw(for foreach);
@is_for_foreach{@_} = (1) x scalar(@_);
my %is_my_our;
@_ = qw(my our);
@is_my_our{@_} = (1) x scalar(@_);
# These keywords may introduce blocks after parenthesized expressions,
# in the form:
# keyword ( .... ) { BLOCK }
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
my %is_blocktype_with_paren;
@_ = qw(if elsif unless while until for foreach switch case given when);
@is_blocktype_with_paren{@_} = (1) x scalar(@_);
# ------------------------------------------------------------
# begin hash of code for handling most token types
# ------------------------------------------------------------
my $tokenization_code = {
# no special code for these types yet, but syntax checks
# could be added
## '!' => undef,
## '!=' => undef,
## '!~' => undef,
## '%=' => undef,
## '&&=' => undef,
## '&=' => undef,
## '+=' => undef,
## '-=' => undef,
## '..' => undef,
## '..' => undef,
## '...' => undef,
## '.=' => undef,
## '<<=' => undef,
## '<=' => undef,
## '<=>' => undef,
## '<>' => undef,
## '=' => undef,
## '==' => undef,
## '=~' => undef,
## '>=' => undef,
## '>>' => undef,
## '>>=' => undef,
## '\\' => undef,
## '^=' => undef,
## '|=' => undef,
## '||=' => undef,
## '//=' => undef,
## '~' => undef,
## '~~' => undef,
## '!~~' => undef,
'>' => sub {
error_if_expecting_TERM()
if ( $expecting == TERM );
},
'|' => sub {
error_if_expecting_TERM()
if ( $expecting == TERM );
},
'$' => sub {
# start looking for a scalar
error_if_expecting_OPERATOR("Scalar")
if ( $expecting == OPERATOR );
scan_identifier();
if ( $identifier eq '$^W' ) {
$tokenizer_self->{_saw_perl_dash_w} = 1;
}
# Check for indentifier in indirect object slot
# (vorboard.pl, sort.t). Something like:
# /^(print|printf|sort|exec|system)$/
if (
$is_indirect_object_taker{$last_nonblank_token}
|| ( ( $last_nonblank_token eq '(' )
&& $is_indirect_object_taker{ $paren_type[$paren_depth] } )
|| ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
)
{
$type = 'Z';
}
},
'(' => sub {
++$paren_depth;
$paren_semicolon_count[$paren_depth] = 0;
if ($want_paren) {
$container_type = $want_paren;
$want_paren = "";
}
else {
$container_type = $last_nonblank_token;
# We can check for a syntax error here of unexpected '(',
# but this is going to get messy...
if (
$expecting == OPERATOR
# be sure this is not a method call of the form
# &method(...), $method->(..), &{method}(...),
# $ref[2](list) is ok & short for $ref[2]->(list)
# NOTE: at present, braces in something like &{ xxx }
# are not marked as a block, we might have a method call
&& $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
)
{
# ref: camel 3 p 703.
if ( $last_last_nonblank_token eq 'do' ) {
complain(
"do SUBROUTINE is deprecated; consider & or -> notation\n"
);
}
else {
# if this is an empty list, (), then it is not an
# error; for example, we might have a constant pi and
# invoke it with pi() or just pi;
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens,
$max_token_index );
if ( $next_nonblank_token ne ')' ) {
my $hint;
error_if_expecting_OPERATOR('(');
if ( $last_nonblank_type eq 'C' ) {
$hint =
"$last_nonblank_token has a void prototype\n";
}
elsif ( $last_nonblank_type eq 'i' ) {
if ( $i_tok > 0
&& $last_nonblank_token =~ /^\$/ )
{
$hint =
"Do you mean '$last_nonblank_token->(' ?\n";
}
}
if ($hint) {
interrupt_logfile();
warning($hint);
resume_logfile();
}
} ## end if ( $next_nonblank_token...
} ## end else [ if ( $last_last_nonblank_token...
} ## end if ( $expecting == OPERATOR...
}
$paren_type[$paren_depth] = $container_type;
( $type_sequence, $indent_flag ) =
increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
# propagate types down through nested parens
# for example: the second paren in 'if ((' would be structural
# since the first is.
if ( $last_nonblank_token eq '(' ) {
$type = $last_nonblank_type;
}
# We exclude parens as structural after a ',' because it
# causes subtle problems with continuation indentation for
# something like this, where the first 'or' will not get
# indented.
#
# assert(
# __LINE__,
# ( not defined $check )
# or ref $check
# or $check eq "new"
# or $check eq "old",
# );
#
# Likewise, we exclude parens where a statement can start
# because of problems with continuation indentation, like
# these:
#
# ($firstline =~ /^#\!.*perl/)
# and (print $File::Find::name, "\n")
# and (return 1);
#
# (ref($usage_fref) =~ /CODE/)
# ? &$usage_fref
# : (&blast_usage, &blast_params, &blast_general_params);
else {
$type = '{';
}
if ( $last_nonblank_type eq ')' ) {
warning(
"Syntax error? found token '$last_nonblank_type' then '('\n"
);
}
$paren_structural_type[$paren_depth] = $type;
},
')' => sub {
( $type_sequence, $indent_flag ) =
decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
if ( $paren_structural_type[$paren_depth] eq '{' ) {
$type = '}';
}
$container_type = $paren_type[$paren_depth];
# /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
my $num_sc = $paren_semicolon_count[$paren_depth];
if ( $num_sc > 0 && $num_sc != 2 ) {
warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
}
}
if ( $paren_depth > 0 ) { $paren_depth-- }
},
',' => sub {
if ( $last_nonblank_type eq ',' ) {
complain("Repeated ','s \n");
}
# patch for operator_expected: note if we are in the list (use.t)
if ( $statement_type eq 'use' ) { $statement_type = '_use' }
## FIXME: need to move this elsewhere, perhaps check after a '('
## elsif ($last_nonblank_token eq '(') {
## warning("Leading ','s illegal in some versions of perl\n");
## }
},
';' => sub {
$context = UNKNOWN_CONTEXT;
$statement_type = '';
# /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } )
{ # mark ; in for loop
# Be careful: we do not want a semicolon such as the
# following to be included:
#
# for (sort {strcoll($a,$b);} keys %investments) {
if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
&& $square_bracket_depth ==
$depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
{
$type = 'f';
$paren_semicolon_count[$paren_depth]++;
}
}
},
'"' => sub {
error_if_expecting_OPERATOR("String")
if ( $expecting == OPERATOR );
$in_quote = 1;
$type = 'Q';
$allowed_quote_modifiers = "";
},
"'" => sub {
error_if_expecting_OPERATOR("String")
if ( $expecting == OPERATOR );
$in_quote = 1;
$type = 'Q';
$allowed_quote_modifiers = "";
},
'`' => sub {
error_if_expecting_OPERATOR("String")
if ( $expecting == OPERATOR );
$in_quote = 1;
$type = 'Q';
$allowed_quote_modifiers = "";
},
'/' => sub {
my $is_pattern;
if ( $expecting == UNKNOWN ) { # indeterminte, must guess..
my $msg;
( $is_pattern, $msg ) =
guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
$max_token_index );
if ($msg) {
write_diagnostics("DIVIDE:$msg\n");
write_logfile_entry($msg);
}
}
else { $is_pattern = ( $expecting == TERM ) }
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
$allowed_quote_modifiers = '[cgimosxp]';
}
else { # not a pattern; check for a /= token
if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
$i++;
$tok = '/=';
$type = $tok;
}
#DEBUG - collecting info on what tokens follow a divide
# for development of guessing algorithm
#if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
# #write_diagnostics( "DIVIDE? $input_line\n" );
#}
}
},
'{' => sub {
# if we just saw a ')', we will label this block with
# its type. We need to do this to allow sub
# code_block_type to determine if this brace starts a
# code block or anonymous hash. (The type of a paren
# pair is the preceding token, such as 'if', 'else',
# etc).
$container_type = "";
# ATTRS: for a '{' following an attribute list, reset
# things to look like we just saw the sub name
if ( $statement_type =~ /^sub/ ) {
$last_nonblank_token = $statement_type;
$last_nonblank_type = 'i';
$statement_type = "";
}
# patch for SWITCH/CASE: hide these keywords from an immediately
# following opening brace
elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
&& $statement_type eq $last_nonblank_token )
{
$last_nonblank_token = ";";
}
elsif ( $last_nonblank_token eq ')' ) {
$last_nonblank_token = $paren_type[ $paren_depth + 1 ];
# defensive move in case of a nesting error (pbug.t)
# in which this ')' had no previous '('
# this nesting error will have been caught
if ( !defined($last_nonblank_token) ) {
$last_nonblank_token = 'if';
}
# check for syntax error here;
unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
my $list = join( ' ', sort keys %is_blocktype_with_paren );
warning(
"syntax error at ') {', didn't see one of: $list\n");
}
}
# patch for paren-less for/foreach glitch, part 2.
# see note below under 'qw'
elsif ($last_nonblank_token eq 'qw'
&& $is_for_foreach{$want_paren} )
{
$last_nonblank_token = $want_paren;
if ( $last_last_nonblank_token eq $want_paren ) {
warning(
"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
);
}
$want_paren = "";
}
# now identify which of the three possible types of
# curly braces we have: hash index container, anonymous
# hash reference, or code block.
# non-structural (hash index) curly brace pair
# get marked 'L' and 'R'
if ( is_non_structural_brace() ) {
$type = 'L';
# patch for SWITCH/CASE:
# allow paren-less identifier after 'when'
# if the brace is preceded by a space
if ( $statement_type eq 'when'
&& $last_nonblank_type eq 'i'
&& $last_last_nonblank_type eq 'k'
&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
{
$type = '{';
$block_type = $statement_type;
}
}
# code and anonymous hash have the same type, '{', but are
# distinguished by 'block_type',
# which will be blank for an anonymous hash
else {
$block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
$max_token_index );
# patch to promote bareword type to function taking block
if ( $block_type
&& $last_nonblank_type eq 'w'
&& $last_nonblank_i >= 0 )
{
if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
$routput_token_type->[$last_nonblank_i] = 'G';
}
}
# patch for SWITCH/CASE: if we find a stray opening block brace
# where we might accept a 'case' or 'when' block, then take it
if ( $statement_type eq 'case'
|| $statement_type eq 'when' )
{
if ( !$block_type || $block_type eq '}' ) {
$block_type = $statement_type;
}
}
}
$brace_type[ ++$brace_depth ] = $block_type;
$brace_package[$brace_depth] = $current_package;
( $type_sequence, $indent_flag ) =
increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
$brace_structural_type[$brace_depth] = $type;
$brace_context[$brace_depth] = $context;
$brace_statement_type[$brace_depth] = $statement_type;
},
'}' => sub {
$block_type = $brace_type[$brace_depth];
if ($block_type) { $statement_type = '' }
if ( defined( $brace_package[$brace_depth] ) ) {
$current_package = $brace_package[$brace_depth];
}
# can happen on brace error (caught elsewhere)
else {
}
( $type_sequence, $indent_flag ) =
decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
if ( $brace_structural_type[$brace_depth] eq 'L' ) {
$type = 'R';
}
# propagate type information for 'do' and 'eval' blocks.
# This is necessary to enable us to know if an operator
# or term is expected next
if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
$tok = $brace_type[$brace_depth];
}
$context = $brace_context[$brace_depth];
$statement_type = $brace_statement_type[$brace_depth];
if ( $brace_depth > 0 ) { $brace_depth--; }
},
'&' => sub { # maybe sub call? start looking
# We have to check for sub call unless we are sure we
# are expecting an operator. This example from s2p
# got mistaken as a q operator in an early version:
# print BODY &q(<<'EOT');
if ( $expecting != OPERATOR ) {
scan_identifier();
}
else {
}
},
'<' => sub { # angle operator or less than?
if ( $expecting != OPERATOR ) {
( $i, $type ) =
find_angle_operator_termination( $input_line, $i, $rtoken_map,
$expecting, $max_token_index );
}
else {
}
},
'?' => sub { # ?: conditional or starting pattern?
my $is_pattern;
if ( $expecting == UNKNOWN ) {
my $msg;
( $is_pattern, $msg ) =
guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
$max_token_index );
if ($msg) { write_logfile_entry($msg) }
}
else { $is_pattern = ( $expecting == TERM ) }
if ($is_pattern) {
$in_quote = 1;
$type = 'Q';
$allowed_quote_modifiers = '[cgimosxp]';
}
else {
( $type_sequence, $indent_flag ) =
increase_nesting_depth( QUESTION_COLON,
$$rtoken_map[$i_tok] );
}
},
'*' => sub { # typeglob, or multiply?
if ( $expecting == TERM ) {
scan_identifier();
}
else {
if ( $$rtokens[ $i + 1 ] eq '=' ) {
$tok = '*=';
$type = $tok;
$i++;
}
elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
$tok = '**';
$type = $tok;
$i++;
if ( $$rtokens[ $i + 1 ] eq '=' ) {
$tok = '**=';
$type = $tok;
$i++;
}
}
}
},
'.' => sub { # what kind of . ?
if ( $expecting != OPERATOR ) {
scan_number();
if ( $type eq '.' ) {
error_if_expecting_TERM()
if ( $expecting == TERM );
}
}
else {
}
},
':' => sub {
# if this is the first nonblank character, call it a label
# since perl seems to just swallow it
if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
$type = 'J';
}
# ATTRS: check for a ':' which introduces an attribute list
# (this might eventually get its own token type)
elsif ( $statement_type =~ /^sub/ ) {
$type = 'A';
$in_attribute_list = 1;
}
# check for scalar attribute, such as
# my $foo : shared = 1;
elsif ($is_my_our{$statement_type}
&& $current_depth[QUESTION_COLON] == 0 )
{
$type = 'A';
$in_attribute_list = 1;
}
# otherwise, it should be part of a ?/: operator
else {
( $type_sequence, $indent_flag ) =
decrease_nesting_depth( QUESTION_COLON,
$$rtoken_map[$i_tok] );
if ( $last_nonblank_token eq '?' ) {
warning("Syntax error near ? :\n");
}
}
},
'+' => sub { # what kind of plus?
if ( $expecting == TERM ) {
my $number = scan_number();
# unary plus is safest assumption if not a number
if ( !defined($number) ) { $type = 'p'; }
}
elsif ( $expecting == OPERATOR ) {
}
else {
if ( $next_type eq 'w' ) { $type = 'p' }
}
},
'@' => sub {
error_if_expecting_OPERATOR("Array")
if ( $expecting == OPERATOR );
scan_identifier();
},
'%' => sub { # hash or modulo?
# first guess is hash if no following blank
if ( $expecting == UNKNOWN ) {
if ( $next_type ne 'b' ) { $expecting = TERM }
}
if ( $expecting == TERM ) {
scan_identifier();
}
},
'[' => sub {
$square_bracket_type[ ++$square_bracket_depth ] =
$last_nonblank_token;
( $type_sequence, $indent_flag ) =
increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
# It may seem odd, but structural square brackets have
# type '{' and '}'. This simplifies the indentation logic.
if ( !is_non_structural_brace() ) {
$type = '{';
}
$square_bracket_structural_type[$square_bracket_depth] = $type;
},
']' => sub {
( $type_sequence, $indent_flag ) =
decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
{
$type = '}';
}
if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
},
'-' => sub { # what kind of minus?
if ( ( $expecting != OPERATOR )
&& $is_file_test_operator{$next_tok} )
{
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i + 1, $rtokens,
$max_token_index );
# check for a quoted word like "-w=>xx";
# it is sufficient to just check for a following '='
if ( $next_nonblank_token eq '=' ) {
$type = 'm';
}
else {
$i++;
$tok .= $next_tok;
$type = 'F';
}
}
elsif ( $expecting == TERM ) {
my $number = scan_number();
# maybe part of bareword token? unary is safest
if ( !defined($number) ) { $type = 'm'; }
}
elsif ( $expecting == OPERATOR ) {
}
else {
if ( $next_type eq 'w' ) {
$type = 'm';
}
}
},
'^' => sub {
# check for special variables like ${^WARNING_BITS}
if ( $expecting == TERM ) {
# FIXME: this should work but will not catch errors
# because we also have to be sure that previous token is
# a type character ($,@,%).
if ( $last_nonblank_token eq '{'
&& ( $next_tok =~ /^[A-Za-z_]/ ) )
{
if ( $next_tok eq 'W' ) {
$tokenizer_self->{_saw_perl_dash_w} = 1;
}
$tok = $tok . $next_tok;
$i = $i + 1;
$type = 'w';
}
else {
unless ( error_if_expecting_TERM() ) {
# Something like this is valid but strange:
# undef ^I;
complain("The '^' seems unusual here\n");
}
}
}
},
'::' => sub { # probably a sub call
scan_bare_identifier();
},
'<<' => sub { # maybe a here-doc?
return
unless ( $i < $max_token_index )
; # here-doc not possible if end of line
if ( $expecting != OPERATOR ) {
my ( $found_target, $here_doc_target, $here_quote_character,
$saw_error );
(
$found_target, $here_doc_target, $here_quote_character, $i,
$saw_error
)
= find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
$max_token_index );
if ($found_target) {
push @{$rhere_target_list},
[ $here_doc_target, $here_quote_character ];
$type = 'h';
if ( length($here_doc_target) > 80 ) {
my $truncated = substr( $here_doc_target, 0, 80 );
complain("Long here-target: '$truncated' ...\n");
}
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
complain(
"Unconventional here-target: '$here_doc_target'\n"
);
}
}
elsif ( $expecting == TERM ) {
unless ($saw_error) {
# shouldn't happen..
warning("Program bug; didn't find here doc target\n");
report_definite_bug();
}
}
}
else {
}
},
'->' => sub {
scan_identifier();
},
'++' => sub {
if ( $expecting == TERM ) { $type = 'pp' }
elsif ( $expecting == UNKNOWN ) {
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
}
},
'=>' => sub {
if ( $last_nonblank_type eq $tok ) {
complain("Repeated '=>'s \n");
}
if ( $statement_type eq 'use' ) { $statement_type = '_use' }
},
'--' => sub {
if ( $expecting == TERM ) { $type = 'mm' }
elsif ( $expecting == UNKNOWN ) {
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
}
},
'&&' => sub {
error_if_expecting_TERM()
if ( $expecting == TERM );
},
'||' => sub {
error_if_expecting_TERM()
if ( $expecting == TERM );
},
'//' => sub {
error_if_expecting_TERM()
if ( $expecting == TERM );
},
};
my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
my %is_zero_continuation_block_type;
@_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
if elsif else unless while until for foreach switch case given when);
@is_zero_continuation_block_type{@_} = (1) x scalar(@_);
my %is_not_zero_continuation_block_type;
@_ = qw(sort grep map do eval);
@is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
my %is_logical_container;
@_ = qw(if elsif unless while and or err not && ! || for foreach);
@is_logical_container{@_} = (1) x scalar(@_);
my %is_binary_type;
@_ = qw(|| &&);
@is_binary_type{@_} = (1) x scalar(@_);
my %is_binary_keyword;
@_ = qw(and or err eq ne cmp);
@is_binary_keyword{@_} = (1) x scalar(@_);
my %is_opening_type;
@_ = qw" L { ( [ ";
@is_opening_type{@_} = (1) x scalar(@_);
my %is_closing_type;
@_ = qw" R } ) ] ";
@is_closing_type{@_} = (1) x scalar(@_);
my %is_redo_last_next_goto;
@_ = qw(redo last next goto);
@is_redo_last_next_goto{@_} = (1) x scalar(@_);
my %is_use_require;
@_ = qw(use require);
@is_use_require{@_} = (1) x scalar(@_);
my %is_sub_package;
@_ = qw(sub package);
@is_sub_package{@_} = (1) x scalar(@_);
my %is_format_END_DATA = (
'format' => '_in_format',
'__END__' => '_in_end',
'__DATA__' => '_in_data',
);
my %quote_modifiers = (
's' => '[cegimosxp]',
'y' => '[cds]',
'tr' => '[cds]',
'm' => '[cgimosxp]',
'qr' => '[imosxp]',
'q' => "",
'qq' => "",
'qw' => "",
'qx' => "",
);
my %quote_items = (
's' => 2,
'y' => 2,
'tr' => 2,
'm' => 1,
'qr' => 1,
'q' => 1,
'qq' => 1,
'qw' => 1,
'qx' => 1,
);
sub tokenize_this_line {
my $line_of_tokens = shift;
my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
$input_line_number = $line_of_tokens->{_line_number};
$line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
{
$tokenizer_self->{_in_pod} = 1;
return;
}
}
$input_line = $untrimmed_input_line;
chomp $input_line;
unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
$input_line =~ s/^\s*//; # trim left end
}
$tokenizer_self->{_line_text} = $input_line;
$routput_token_list = []; $routput_token_type = []; $routput_block_type = []; $routput_container_type = []; $routput_type_sequence = [];
$rhere_target_list = [];
$tok = $last_nonblank_token;
$type = $last_nonblank_type;
$prototype = $last_nonblank_prototype;
$last_nonblank_i = -1;
$block_type = $last_nonblank_block_type;
$container_type = $last_nonblank_container_type;
$type_sequence = $last_nonblank_type_sequence;
$indent_flag = 0;
$peeked_ahead = 0;
my $max_tokens_wanted = 0;
if ( !$in_quote && ( $input_line =~ /^ $max_tokens_wanted = 1 }
( $rtokens, $rtoken_map, $rtoken_type ) =
pre_tokenize( $input_line, $max_tokens_wanted );
$max_token_index = scalar(@$rtokens) - 1;
push( @$rtokens, ' ', ' ', ' ' ); push( @$rtoken_map, 0, 0, 0 ); push( @$rtoken_type, 'b', 'b', 'b' );
for $i ( 0 .. $max_token_index + 3 ) {
$routput_token_type->[$i] = "";
$routput_block_type->[$i] = "";
$routput_container_type->[$i] = "";
$routput_type_sequence->[$i] = "";
$routput_indent_flag->[$i] = 0;
}
$i = -1;
$i_tok = -1;
while ( ++$i <= $max_token_index ) {
if ($in_quote) { $type = $quote_type;
unless ( @{$routput_token_list} )
{ push( @{$routput_token_list}, $i );
$routput_token_type->[$i] = $type;
}
$tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
(
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
$quoted_string_1, $quoted_string_2
)
= do_quote(
$i, $in_quote, $quote_character,
$quote_pos, $quote_depth, $quoted_string_1,
$quoted_string_2, $rtokens, $rtoken_map,
$max_token_index
);
last if ($in_quote);
my $qs1 = $quoted_string_1;
my $qs2 = $quoted_string_2;
$quote_character = '';
$quote_pos = 0;
$quote_type = 'Q';
$quoted_string_1 = "";
$quoted_string_2 = "";
last if ( ++$i > $max_token_index );
if ($allowed_quote_modifiers) {
if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
my $str = $$rtokens[$i];
my $saw_modifier_e;
while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
my $pos = pos($str);
my $char = substr( $str, $pos - 1, 1 );
$saw_modifier_e ||= ( $char eq 'e' );
}
if ($saw_modifier_e) {
my $rht = scan_replacement_text($qs1);
if ($rht) {
push @{$rhere_target_list}, @{$rht};
$type = 'h';
if ( $i_tok < 0 ) {
my $ilast = $routput_token_list->[-1];
$routput_token_type->[$ilast] = $type;
}
}
}
if ( defined( pos($str) ) ) {
if ( pos($str) == length($str) ) {
last if ( ++$i > $max_token_index );
}
else {
warning(<<EOM);
Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
Please put a space between quote modifiers and trailing keywords.
EOM
last if ( ++$i > $max_token_index );
}
}
else {
write_logfile_entry(
"Note: found word $str at quote modifier location\n"
);
}
}
$allowed_quote_modifiers = "";
}
}
unless ( $tok =~ /^\s*$/ ) {
if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
if ( $last_nonblank_token eq 'eq' ) {
complain("Should 'eq' be '==' here ?\n");
}
elsif ( $last_nonblank_token eq 'ne' ) {
complain("Should 'ne' be '!=' here ?\n");
}
}
$last_last_nonblank_token = $last_nonblank_token;
$last_last_nonblank_type = $last_nonblank_type;
$last_last_nonblank_block_type = $last_nonblank_block_type;
$last_last_nonblank_container_type =
$last_nonblank_container_type;
$last_last_nonblank_type_sequence =
$last_nonblank_type_sequence;
$last_nonblank_token = $tok;
$last_nonblank_type = $type;
$last_nonblank_prototype = $prototype;
$last_nonblank_block_type = $block_type;
$last_nonblank_container_type = $container_type;
$last_nonblank_type_sequence = $type_sequence;
$last_nonblank_i = $i_tok;
}
if ( $i_tok >= 0 ) {
$routput_token_type->[$i_tok] = $type;
$routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence;
$routput_indent_flag->[$i_tok] = $indent_flag;
}
my $pre_tok = $$rtokens[$i]; my $pre_type = $$rtoken_type[$i]; $tok = $pre_tok;
$type = $pre_type; $block_type = ""; $container_type = ""; $type_sequence = ""; $indent_flag = 0;
$prototype = ""; $i_tok = $i;
push( @{$routput_token_list}, $i_tok );
if ( $id_scan_state && $pre_type !~ /[b
if ( $id_scan_state =~ /^(sub|package)/ ) {
scan_id();
}
else {
scan_identifier();
}
last if ($id_scan_state);
next if ( ( $i > 0 ) || $type );
$type = $pre_type;
$tok = $pre_tok;
}
next if ( $type eq 'b' );
my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
my $test_tok = $tok . $$rtokens[ $i + 1 ];
my $combine_ok = $is_digraph{$test_tok};
if ($combine_ok) {
if ( $test_tok eq '//' ) {
my $next_type = $$rtokens[ $i + 1 ];
my $expecting =
operator_expected( $prev_type, $tok, $next_type );
$combine_ok = 0 unless ( $expecting == OPERATOR );
}
}
if (
$combine_ok
&& ( $test_tok ne '/=' ) && ( $test_tok ne 'x=' ) && ( $test_tok ne '**' ) && ( $test_tok ne '*=' ) )
{
$tok = $test_tok;
$i++;
$test_tok = $tok . $$rtokens[ $i + 1 ];
if ( $is_trigraph{$test_tok} ) {
$tok = $test_tok;
$i++;
}
}
$type = $tok;
$next_tok = $$rtokens[ $i + 1 ];
$next_type = $$rtoken_type[ $i + 1 ];
TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
local $" = ')(';
my @debug_list = (
$last_nonblank_token, $tok,
$next_tok, $brace_depth,
$brace_type[$brace_depth], $paren_depth,
$paren_type[$paren_depth]
);
print "TOKENIZE:(@debug_list)\n";
};
# turn off attribute list on first non-blank, non-bareword
if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
###############################################################
# We have the next token, $tok.
# Now we have to examine this token and decide what it is
# and define its $type
#
# section 1: bare words
###############################################################
if ( $pre_type eq 'w' ) {
$expecting = operator_expected( $prev_type, $tok, $next_type );
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
# ATTRS: handle sub and variable attributes
if ($in_attribute_list) {
# treat bare word followed by open paren like qw(
if ( $next_nonblank_token eq '(' ) {
$in_quote = $quote_items{'q'};
$allowed_quote_modifiers = $quote_modifiers{'q'};
$type = 'q';
$quote_type = 'q';
next;
}
# handle bareword not followed by open paren
else {
$type = 'w';
next;
}
}
# quote a word followed by => operator
if ( $next_nonblank_token eq '=' ) {
if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
if ( $is_constant{$current_package}{$tok} ) {
$type = 'C';
}
elsif ( $is_user_function{$current_package}{$tok} ) {
$type = 'U';
$prototype =
$user_function_prototype{$current_package}{$tok};
}
elsif ( $tok =~ /^v\d+$/ ) {
$type = 'v';
report_v_string($tok);
}
else { $type = 'w' }
next;
}
}
# quote a bare word within braces..like xxx->{s}; note that we
# must be sure this is not a structural brace, to avoid
# mistaking {s} in the following for a quoted bare word:
# for(@[){s}bla}BLA}
# Also treat q in something like var{-q} as a bare word, not qoute operator
##if ( ( $last_nonblank_type eq 'L' )
## && ( $next_nonblank_token eq '}' ) )
if (
$next_nonblank_token eq '}'
&& (
$last_nonblank_type eq 'L'
|| ( $last_nonblank_type eq 'm'
&& $last_last_nonblank_type eq 'L' )
)
)
{
$type = 'w';
next;
}
# a bare word immediately followed by :: is not a keyword;
# use $tok_kw when testing for keywords to avoid a mistake
my $tok_kw = $tok;
if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
{
$tok_kw .= '::';
}
# handle operator x (now we know it isn't $x=)
if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
if ( $tok eq 'x' ) {
if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
$tok = 'x=';
$type = $tok;
$i++;
}
else {
$type = 'x';
}
}
# FIXME: Patch: mark something like x4 as an integer for now
# It gets fixed downstream. This is easier than
# splitting the pretoken.
else {
$type = 'n';
}
}
elsif ( ( $tok eq 'strict' )
and ( $last_nonblank_token eq 'use' ) )
{
$tokenizer_self->{_saw_use_strict} = 1;
scan_bare_identifier();
}
elsif ( ( $tok eq 'warnings' )
and ( $last_nonblank_token eq 'use' ) )
{
$tokenizer_self->{_saw_perl_dash_w} = 1;
# scan as identifier, so that we pick up something like:
# use warnings::register
scan_bare_identifier();
}
elsif (
$tok eq 'AutoLoader'
&& $tokenizer_self->{_look_for_autoloader}
&& (
$last_nonblank_token eq 'use'
# these regexes are from AutoSplit.pm, which we want
# to mimic
|| $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
|| $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
)
)
{
write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
$tokenizer_self->{_saw_autoloader} = 1;
$tokenizer_self->{_look_for_autoloader} = 0;
scan_bare_identifier();
}
elsif (
$tok eq 'SelfLoader'
&& $tokenizer_self->{_look_for_selfloader}
&& ( $last_nonblank_token eq 'use'
|| $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
|| $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
)
{
write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
$tokenizer_self->{_saw_selfloader} = 1;
$tokenizer_self->{_look_for_selfloader} = 0;
scan_bare_identifier();
}
elsif ( ( $tok eq 'constant' )
and ( $last_nonblank_token eq 'use' ) )
{
scan_bare_identifier();
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens,
$max_token_index );
if ($next_nonblank_token) {
if ( $is_keyword{$next_nonblank_token} ) {
warning(
"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
);
}
# FIXME: could check for error in which next token is
# not a word (number, punctuation, ..)
else {
$is_constant{$current_package}
{$next_nonblank_token} = 1;
}
}
}
# various quote operators
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
if ( $expecting == OPERATOR ) {
# patch for paren-less for/foreach glitch, part 1
# perl will accept this construct as valid:
#
# foreach my $key qw\Uno Due Tres Quadro\ {
# print "Set $key\n";
# }
unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
{
error_if_expecting_OPERATOR();
}
}
$in_quote = $quote_items{$tok};
$allowed_quote_modifiers = $quote_modifiers{$tok};
# All quote types are 'Q' except possibly qw quotes.
# qw quotes are special in that they may generally be trimmed
# of leading and trailing whitespace. So they are given a
# separate type, 'q', unless requested otherwise.
$type =
( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
? 'q'
: 'Q';
$quote_type = $type;
}
# check for a statement label
elsif (
( $next_nonblank_token eq ':' )
&& ( $$rtokens[ $i_next + 1 ] ne ':' )
&& ( $i_next <= $max_token_index ) # colon on same line
&& label_ok()
)
{
if ( $tok !~ /[A-Z]/ ) {
push @{ $tokenizer_self->{_rlower_case_labels_at} },
$input_line_number;
}
$type = 'J';
$tok .= ':';
$i = $i_next;
next;
}
# 'sub' || 'package'
elsif ( $is_sub_package{$tok_kw} ) {
error_if_expecting_OPERATOR()
if ( $expecting == OPERATOR );
scan_id();
}
# Note on token types for format, __DATA__, __END__:
# It simplifies things to give these type ';', so that when we
# start rescanning we will be expecting a token of type TERM.
# We will switch to type 'k' before outputting the tokens.
elsif ( $is_format_END_DATA{$tok_kw} ) {
$type = ';'; # make tokenizer look for TERM next
$tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
last;
}
elsif ( $is_keyword{$tok_kw} ) {
$type = 'k';
# Since for and foreach may not be followed immediately
# by an opening paren, we have to remember which keyword
# is associated with the next '('
if ( $is_for_foreach{$tok} ) {
if ( new_statement_ok() ) {
$want_paren = $tok;
}
}
# recognize 'use' statements, which are special
elsif ( $is_use_require{$tok} ) {
$statement_type = $tok;
error_if_expecting_OPERATOR()
if ( $expecting == OPERATOR );
}
# remember my and our to check for trailing ": shared"
elsif ( $is_my_our{$tok} ) {
$statement_type = $tok;
}
# Check for misplaced 'elsif' and 'else', but allow isolated
# else or elsif blocks to be formatted. This is indicated
# by a last noblank token of ';'
elsif ( $tok eq 'elsif' ) {
if ( $last_nonblank_token ne ';'
&& $last_nonblank_block_type !~
/^(if|elsif|unless)$/ )
{
warning(
"expecting '$tok' to follow one of 'if|elsif|unless'\n"
);
}
}
elsif ( $tok eq 'else' ) {
# patched for SWITCH/CASE
if ( $last_nonblank_token ne ';'
&& $last_nonblank_block_type !~
/^(if|elsif|unless|case|when)$/ )
{
warning(
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
);
}
}
elsif ( $tok eq 'continue' ) {
if ( $last_nonblank_token ne ';'
&& $last_nonblank_block_type !~
/(^(\{|\}|;|while|until|for|foreach)|:$)/ )
{
# note: ';' '{' and '}' in list above
# because continues can follow bare blocks;
# ':' is labeled block
#
############################################
# NOTE: This check has been deactivated because
# continue has an alternative usage for given/when
# blocks in perl 5.10
## warning("'$tok' should follow a block\n");
############################################
}
}
# patch for SWITCH/CASE if 'case' and 'when are
# treated as keywords.
elsif ( $tok eq 'when' || $tok eq 'case' ) {
$statement_type = $tok; # next '{' is block
}
# indent trailing if/unless/while/until
# outdenting will be handled by later indentation loop
if ( $tok =~ /^(if|unless|while|until)$/
&& $next_nonblank_token ne '(' )
{
$indent_flag = 1;
}
}
# check for inline label following
# /^(redo|last|next|goto)$/
elsif (( $last_nonblank_type eq 'k' )
&& ( $is_redo_last_next_goto{$last_nonblank_token} ) )
{
$type = 'j';
next;
}
# something else --
else {
scan_bare_identifier();
if ( $type eq 'w' ) {
if ( $expecting == OPERATOR ) {
# don't complain about possible indirect object
# notation.
# For example:
# package main;
# sub new($) { ... }
# $b = new A::; # calls A::new
# $c = new A; # same thing but suspicious
# This will call A::new but we have a 'new' in
# main:: which looks like a constant.
#
if ( $last_nonblank_type eq 'C' ) {
if ( $tok !~ /::$/ ) {
complain(<<EOM);
Expecting operator after '$last_nonblank_token' but found bare word '$tok'
Maybe indirectet object notation?
EOM
}
}
else {
error_if_expecting_OPERATOR("bareword");
}
}
# mark bare words immediately followed by a paren as
# functions
$next_tok = $$rtokens[ $i + 1 ];
if ( $next_tok eq '(' ) {
$type = 'U';
}
# underscore after file test operator is file handle
if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
$type = 'Z';
}
# patch for SWITCH/CASE if 'case' and 'when are
# not treated as keywords:
if (
(
$tok eq 'case'
&& $brace_type[$brace_depth] eq 'switch'
)
|| ( $tok eq 'when'
&& $brace_type[$brace_depth] eq 'given' )
)
{
$statement_type = $tok; # next '{' is block
$type = 'k'; # for keyword syntax coloring
}
# patch for SWITCH/CASE if switch and given not keywords
# Switch is not a perl 5 keyword, but we will gamble
# and mark switch followed by paren as a keyword. This
# is only necessary to get html syntax coloring nice,
# and does not commit this as being a switch/case.
if ( $next_nonblank_token eq '('
&& ( $tok eq 'switch' || $tok eq 'given' ) )
{
$type = 'k'; # for keyword syntax coloring
}
}
}
}
###############################################################
# section 2: strings of digits
###############################################################
elsif ( $pre_type eq 'd' ) {
$expecting = operator_expected( $prev_type, $tok, $next_type );
error_if_expecting_OPERATOR("Number")
if ( $expecting == OPERATOR );
my $number = scan_number();
if ( !defined($number) ) {
# shouldn't happen - we should always get a number
warning("non-number beginning with digit--program bug\n");
report_definite_bug();
}
}
###############################################################
# section 3: all other tokens
###############################################################
else {
last if ( $tok eq '#' );
my $code = $tokenization_code->{$tok};
if ($code) {
$expecting =
operator_expected( $prev_type, $tok, $next_type );
$code->();
redo if $in_quote;
}
}
}
# -----------------------------
# end of main tokenization loop
# -----------------------------
if ( $i_tok >= 0 ) {
$routput_token_type->[$i_tok] = $type;
$routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence;
$routput_indent_flag->[$i_tok] = $indent_flag;
}
unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
$last_last_nonblank_token = $last_nonblank_token;
$last_last_nonblank_type = $last_nonblank_type;
$last_last_nonblank_block_type = $last_nonblank_block_type;
$last_last_nonblank_container_type = $last_nonblank_container_type;
$last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
$last_nonblank_token = $tok;
$last_nonblank_type = $type;
$last_nonblank_block_type = $block_type;
$last_nonblank_container_type = $container_type;
$last_nonblank_type_sequence = $type_sequence;
$last_nonblank_prototype = $prototype;
}
# reset indentation level if necessary at a sub or package
# in an attempt to recover from a nesting error
if ( $level_in_tokenizer < 0 ) {
if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
reset_indentation_level(0);
brace_warning("resetting level to 0 at $1 $2\n");
}
}
# all done tokenizing this line ...
# now prepare the final list of tokens and types
my @token_type = (); # stack of output token types
my @block_type = (); # stack of output code block types
my @container_type = (); # stack of output code container types
my @type_sequence = (); # stack of output type sequence numbers
my @tokens = (); # output tokens
my @levels = (); # structural brace levels of output tokens
my @slevels = (); # secondary nesting levels of output tokens
my @nesting_tokens = (); # string of tokens leading to this depth
my @nesting_types = (); # string of token types leading to this depth
my @nesting_blocks = (); # string of block types leading to this depth
my @nesting_lists = (); # string of list types leading to this depth
my @ci_string = (); # string needed to compute continuation indentation
my @container_environment = (); # BLOCK or LIST
my $container_environment = '';
my $im = -1; # previous $i value
my $num;
my $ci_string_sum = ones_count($ci_string_in_tokenizer);
# Computing Token Indentation
#
# The final section of the tokenizer forms tokens and also computes
# parameters needed to find indentation. It is much easier to do it
# in the tokenizer than elsewhere. Here is a brief description of how
# indentation is computed. Perl::Tidy computes indentation as the sum
# of 2 terms:
#
# (1) structural indentation, such as if/else/elsif blocks
# (2) continuation indentation, such as long parameter call lists.
#
# These are occasionally called primary and secondary indentation.
#
# Structural indentation is introduced by tokens of type '{', although
# the actual tokens might be '{', '(', or '['. Structural indentation
# is of two types: BLOCK and non-BLOCK. Default structural indentation
# is 4 characters if the standard indentation scheme is used.
#
# Continuation indentation is introduced whenever a line at BLOCK level
# is broken before its termination. Default continuation indentation
# is 2 characters in the standard indentation scheme.
#
# Both types of indentation may be nested arbitrarily deep and
# interlaced. The distinction between the two is somewhat arbitrary.
#
# For each token, we will define two variables which would apply if
# the current statement were broken just before that token, so that
# that token started a new line:
#
# $level = the structural indentation level,
# $ci_level = the continuation indentation level
#
# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
# assuming defaults. However, in some special cases it is customary
# to modify $ci_level from this strict value.
#
# The total structural indentation is easy to compute by adding and
# subtracting 1 from a saved value as types '{' and '}' are seen. The
# running value of this variable is $level_in_tokenizer.
#
# The total continuation is much more difficult to compute, and requires
# several variables. These veriables are:
#
# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
# each indentation level, if there are intervening open secondary
# structures just prior to that level.
# $continuation_string_in_tokenizer = a string of 1's and 0's indicating
# if the last token at that level is "continued", meaning that it
# is not the first token of an expression.
# $nesting_block_string = a string of 1's and 0's indicating, for each
# indentation level, if the level is of type BLOCK or not.
# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
# $nesting_list_string = a string of 1's and 0's indicating, for each
# indentation level, if it is is appropriate for list formatting.
# If so, continuation indentation is used to indent long list items.
# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
# @{$rslevel_stack} = a stack of total nesting depths at each
# structural indentation level, where "total nesting depth" means
# the nesting depth that would occur if every nesting token -- '{', '[',
# and '(' -- , regardless of context, is used to compute a nesting
# depth.
#my $nesting_block_flag = ($nesting_block_string =~ /1$/);
#my $nesting_list_flag = ($nesting_list_string =~ /1$/);
my ( $ci_string_i, $level_i, $nesting_block_string_i,
$nesting_list_string_i, $nesting_token_string_i,
$nesting_type_string_i, );
foreach $i ( @{$routput_token_list} )
{ # scan the list of pre-tokens indexes
# self-checking for valid token types
my $type = $routput_token_type->[$i];
my $forced_indentation_flag = $routput_indent_flag->[$i];
# See if we should undo the $forced_indentation_flag.
# Forced indentation after 'if', 'unless', 'while' and 'until'
# expressions without trailing parens is optional and doesn't
# always look good. It is usually okay for a trailing logical
# expression, but if the expression is a function call, code block,
# or some kind of list it puts in an unwanted extra indentation
# level which is hard to remove.
#
# Example where extra indentation looks ok:
# return 1
# if $det_a < 0 and $det_b > 0
# or $det_a > 0 and $det_b < 0;
#
# Example where extra indentation is not needed because
# the eval brace also provides indentation:
# print "not " if defined eval {
# reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
# };
#
# The following rule works fairly well:
# Undo the flag if the end of this line, or start of the next
# line, is an opening container token or a comma.
# This almost always works, but if not after another pass it will
# be stable.
if ( $forced_indentation_flag && $type eq 'k' ) {
my $ixlast = -1;
my $ilast = $routput_token_list->[$ixlast];
my $toklast = $routput_token_type->[$ilast];
if ( $toklast eq '#' ) {
$ixlast--;
$ilast = $routput_token_list->[$ixlast];
$toklast = $routput_token_type->[$ilast];
}
if ( $toklast eq 'b' ) {
$ixlast--;
$ilast = $routput_token_list->[$ixlast];
$toklast = $routput_token_type->[$ilast];
}
if ( $toklast =~ /^[\{,]$/ ) {
$forced_indentation_flag = 0;
}
else {
( $toklast, my $i_next ) =
find_next_nonblank_token( $max_token_index, $rtokens,
$max_token_index );
if ( $toklast =~ /^[\{,]$/ ) {
$forced_indentation_flag = 0;
}
}
}
# if we are already in an indented if, see if we should outdent
if ($indented_if_level) {
# don't try to nest trailing if's - shouldn't happen
if ( $type eq 'k' ) {
$forced_indentation_flag = 0;
}
# check for the normal case - outdenting at next ';'
elsif ( $type eq ';' ) {
if ( $level_in_tokenizer == $indented_if_level ) {
$forced_indentation_flag = -1;
$indented_if_level = 0;
}
}
# handle case of missing semicolon
elsif ( $type eq '}' ) {
if ( $level_in_tokenizer == $indented_if_level ) {
$indented_if_level = 0;
# TBD: This could be a subroutine call
$level_in_tokenizer--;
if ( @{$rslevel_stack} > 1 ) {
pop( @{$rslevel_stack} );
}
if ( length($nesting_block_string) > 1 )
{ # true for valid script
chop $nesting_block_string;
chop $nesting_list_string;
}
}
}
}
my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
$level_i = $level_in_tokenizer;
# This can happen by running perltidy on non-scripts
# although it could also be bug introduced by programming change.
# Perl silently accepts a 032 (^Z) and takes it as the end
if ( !$is_valid_token_type{$type} ) {
my $val = ord($type);
warning(
"unexpected character decimal $val ($type) in script\n");
$tokenizer_self->{_in_error} = 1;
}
# ----------------------------------------------------------------
# TOKEN TYPE PATCHES
# output __END__, __DATA__, and format as type 'k' instead of ';'
# to make html colors correct, etc.
my $fix_type = $type;
if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
# output anonymous 'sub' as keyword
if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
# -----------------------------------------------------------------
$nesting_token_string_i = $nesting_token_string;
$nesting_type_string_i = $nesting_type_string;
$nesting_block_string_i = $nesting_block_string;
$nesting_list_string_i = $nesting_list_string;
# set primary indentation levels based on structural braces
# Note: these are set so that the leading braces have a HIGHER
# level than their CONTENTS, which is convenient for indentation
# Also, define continuation indentation for each token.
if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
{
# use environment before updating
$container_environment =
$nesting_block_flag ? 'BLOCK'
: $nesting_list_flag ? 'LIST'
: "";
# if the difference between total nesting levels is not 1,
# there are intervening non-structural nesting types between
# this '{' and the previous unclosed '{'
my $intervening_secondary_structure = 0;
if ( @{$rslevel_stack} ) {
$intervening_secondary_structure =
$slevel_in_tokenizer - $rslevel_stack->[-1];
}
# Continuation Indentation
#
# Having tried setting continuation indentation both in the formatter and
# in the tokenizer, I can say that setting it in the tokenizer is much,
# much easier. The formatter already has too much to do, and can't
# make decisions on line breaks without knowing what 'ci' will be at
# arbitrary locations.
#
# But a problem with setting the continuation indentation (ci) here
# in the tokenizer is that we do not know where line breaks will actually
# be. As a result, we don't know if we should propagate continuation
# indentation to higher levels of structure.
#
# For nesting of only structural indentation, we never need to do this.
# For example, in a long if statement, like this
#
# if ( !$output_block_type[$i]
# && ($in_statement_continuation) )
# { <--outdented
# do_something();
# }
#
# the second line has ci but we do normally give the lines within the BLOCK
# any ci. This would be true if we had blocks nested arbitrarily deeply.
#
# But consider something like this, where we have created a break after
# an opening paren on line 1, and the paren is not (currently) a
# structural indentation token:
#
# my $file = $menubar->Menubutton(
# qw/-text File -underline 0 -menuitems/ => [
# [
# Cascade => '~View',
# -menuitems => [
# ...
#
# The second line has ci, so it would seem reasonable to propagate it
# down, giving the third line 1 ci + 1 indentation. This suggests the
# following rule, which is currently used to propagating ci down: if there
# are any non-structural opening parens (or brackets, or braces), before
# an opening structural brace, then ci is propagated down, and otherwise
# not. The variable $intervening_secondary_structure contains this
# information for the current token, and the string
# "$ci_string_in_tokenizer" is a stack of previous values of this
# variable.
# save the current states
push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
$level_in_tokenizer++;
if ($forced_indentation_flag) {
# break BEFORE '?' when there is forced indentation
if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
if ( $type eq 'k' ) {
$indented_if_level = $level_in_tokenizer;
}
}
if ( $routput_block_type->[$i] ) {
$nesting_block_flag = 1;
$nesting_block_string .= '1';
}
else {
$nesting_block_flag = 0;
$nesting_block_string .= '0';
}
# we will use continuation indentation within containers
# which are not blocks and not logical expressions
my $bit = 0;
if ( !$routput_block_type->[$i] ) {
# propagate flag down at nested open parens
if ( $routput_container_type->[$i] eq '(' ) {
$bit = 1 if $nesting_list_flag;
}
# use list continuation if not a logical grouping
# /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
else {
$bit = 1
unless
$is_logical_container{ $routput_container_type->[$i]
};
}
}
$nesting_list_string .= $bit;
$nesting_list_flag = $bit;
$ci_string_in_tokenizer .=
( $intervening_secondary_structure != 0 ) ? '1' : '0';
$ci_string_sum = ones_count($ci_string_in_tokenizer);
$continuation_string_in_tokenizer .=
( $in_statement_continuation > 0 ) ? '1' : '0';
# Sometimes we want to give an opening brace continuation indentation,
# and sometimes not. For code blocks, we don't do it, so that the leading
# '{' gets outdented, like this:
#
# if ( !$output_block_type[$i]
# && ($in_statement_continuation) )
# { <--outdented
#
# For other types, we will give them continuation indentation. For example,
# here is how a list looks with the opening paren indented:
#
# @LoL =
# ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
# [ "homer", "marge", "bart" ], );
#
# This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
my $total_ci = $ci_string_sum;
if (
!$routput_block_type->[$i] # patch: skip for BLOCK
&& ($in_statement_continuation)
&& !( $forced_indentation_flag && $type eq ':' )
)
{
$total_ci += $in_statement_continuation
unless ( $ci_string_in_tokenizer =~ /1$/ );
}
$ci_string_i = $total_ci;
$in_statement_continuation = 0;
}
elsif ($type eq '}'
|| $type eq 'R'
|| $forced_indentation_flag < 0 )
{
# only a nesting error in the script would prevent popping here
if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
$level_i = --$level_in_tokenizer;
# restore previous level values
if ( length($nesting_block_string) > 1 )
{ # true for valid script
chop $nesting_block_string;
$nesting_block_flag = ( $nesting_block_string =~ /1$/ );
chop $nesting_list_string;
$nesting_list_flag = ( $nesting_list_string =~ /1$/ );
chop $ci_string_in_tokenizer;
$ci_string_sum = ones_count($ci_string_in_tokenizer);
$in_statement_continuation =
chop $continuation_string_in_tokenizer;
# zero continuation flag at terminal BLOCK '}' which
# ends a statement.
if ( $routput_block_type->[$i] ) {
# ...These include non-anonymous subs
# note: could be sub ::abc { or sub 'abc
if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
# note: older versions of perl require the /gc modifier
# here or else the \G does not work.
if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
{
$in_statement_continuation = 0;
}
}
# ...and include all block types except user subs with
# block prototypes and these: (sort|grep|map|do|eval)
# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
elsif (
$is_zero_continuation_block_type{
$routput_block_type->[$i] } )
{
$in_statement_continuation = 0;
}
# ..but these are not terminal types:
# /^(sort|grep|map|do|eval)$/ )
elsif (
$is_not_zero_continuation_block_type{
$routput_block_type->[$i] } )
{
}
# ..and a block introduced by a label
# /^\w+\s*:$/gc ) {
elsif ( $routput_block_type->[$i] =~ /:$/ ) {
$in_statement_continuation = 0;
}
# user function with block prototype
else {
$in_statement_continuation = 0;
}
}
# If we are in a list, then
# we must set continuatoin indentation at the closing
# paren of something like this (paren after $check):
# assert(
# __LINE__,
# ( not defined $check )
# or ref $check
# or $check eq "new"
# or $check eq "old",
# );
elsif ( $tok eq ')' ) {
$in_statement_continuation = 1
if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
}
elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
}
# use environment after updating
$container_environment =
$nesting_block_flag ? 'BLOCK'
: $nesting_list_flag ? 'LIST'
: "";
$ci_string_i = $ci_string_sum + $in_statement_continuation;
$nesting_block_string_i = $nesting_block_string;
$nesting_list_string_i = $nesting_list_string;
}
# not a structural indentation type..
else {
$container_environment =
$nesting_block_flag ? 'BLOCK'
: $nesting_list_flag ? 'LIST'
: "";
# zero the continuation indentation at certain tokens so
# that they will be at the same level as its container. For
# commas, this simplifies the -lp indentation logic, which
# counts commas. For ?: it makes them stand out.
if ($nesting_list_flag) {
if ( $type =~ /^[,\?\:]$/ ) {
$in_statement_continuation = 0;
}
}
# be sure binary operators get continuation indentation
if (
$container_environment
&& ( $type eq 'k' && $is_binary_keyword{$tok}
|| $is_binary_type{$type} )
)
{
$in_statement_continuation = 1;
}
# continuation indentation is sum of any open ci from previous
# levels plus the current level
$ci_string_i = $ci_string_sum + $in_statement_continuation;
# update continuation flag ...
# if this isn't a blank or comment..
if ( $type ne 'b' && $type ne '#' ) {
# and we are in a BLOCK
if ($nesting_block_flag) {
# the next token after a ';' and label starts a new stmt
if ( $type eq ';' || $type eq 'J' ) {
$in_statement_continuation = 0;
}
# otherwise, we are continuing the current statement
else {
$in_statement_continuation = 1;
}
}
# if we are not in a BLOCK..
else {
# do not use continuation indentation if not list
# environment (could be within if/elsif clause)
if ( !$nesting_list_flag ) {
$in_statement_continuation = 0;
}
# otherwise, the next token after a ',' starts a new term
elsif ( $type eq ',' ) {
$in_statement_continuation = 0;
}
# otherwise, we are continuing the current term
else {
$in_statement_continuation = 1;
}
}
}
}
if ( $level_in_tokenizer < 0 ) {
unless ( $tokenizer_self->{_saw_negative_indentation} ) {
$tokenizer_self->{_saw_negative_indentation} = 1;
warning("Starting negative indentation\n");
}
}
# set secondary nesting levels based on all continment token types
# Note: these are set so that the nesting depth is the depth
# of the PREVIOUS TOKEN, which is convenient for setting
# the stength of token bonds
my $slevel_i = $slevel_in_tokenizer;
# /^[L\{\(\[]$/
if ( $is_opening_type{$type} ) {
$slevel_in_tokenizer++;
$nesting_token_string .= $tok;
$nesting_type_string .= $type;
}
# /^[R\}\)\]]$/
elsif ( $is_closing_type{$type} ) {
$slevel_in_tokenizer--;
my $char = chop $nesting_token_string;
if ( $char ne $matching_start_token{$tok} ) {
$nesting_token_string .= $char . $tok;
$nesting_type_string .= $type;
}
else {
chop $nesting_type_string;
}
}
push( @block_type, $routput_block_type->[$i] );
push( @ci_string, $ci_string_i );
push( @container_environment, $container_environment );
push( @container_type, $routput_container_type->[$i] );
push( @levels, $level_i );
push( @nesting_tokens, $nesting_token_string_i );
push( @nesting_types, $nesting_type_string_i );
push( @slevels, $slevel_i );
push( @token_type, $fix_type );
push( @type_sequence, $routput_type_sequence->[$i] );
push( @nesting_blocks, $nesting_block_string );
push( @nesting_lists, $nesting_list_string );
# now form the previous token
if ( $im >= 0 ) {
$num =
$$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
if ( $num > 0 ) {
push( @tokens,
substr( $input_line, $$rtoken_map[$im], $num ) );
}
}
$im = $i;
}
$num = length($input_line) - $$rtoken_map[$im]; # make the last token
if ( $num > 0 ) {
push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
}
$tokenizer_self->{_in_attribute_list} = $in_attribute_list;
$tokenizer_self->{_in_quote} = $in_quote;
$tokenizer_self->{_quote_target} =
$in_quote ? matching_end_token($quote_character) : "";
$tokenizer_self->{_rhere_target_list} = $rhere_target_list;
$line_of_tokens->{_rtoken_type} = \@token_type;
$line_of_tokens->{_rtokens} = \@tokens;
$line_of_tokens->{_rblock_type} = \@block_type;
$line_of_tokens->{_rcontainer_type} = \@container_type;
$line_of_tokens->{_rcontainer_environment} = \@container_environment;
$line_of_tokens->{_rtype_sequence} = \@type_sequence;
$line_of_tokens->{_rlevels} = \@levels;
$line_of_tokens->{_rslevels} = \@slevels;
$line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
$line_of_tokens->{_rci_levels} = \@ci_string;
$line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
return;
}
} # end tokenize_this_line
#########i#############################################################
# Tokenizer routines which assist in identifying token types
#######################################################################
sub operator_expected {
# Many perl symbols have two or more meanings. For example, '<<'
# can be a shift operator or a here-doc operator. The
# interpretation of these symbols depends on the current state of
# the tokenizer, which may either be expecting a term or an
# operator. For this example, a << would be a shift if an operator
# is expected, and a here-doc if a term is expected. This routine
# is called to make this decision for any current token. It returns
# one of three possible values:
#
# OPERATOR - operator expected (or at least, not a term)
# UNKNOWN - can't tell
# TERM - a term is expected (or at least, not an operator)
#
# The decision is based on what has been seen so far. This
# information is stored in the "$last_nonblank_type" and
# "$last_nonblank_token" variables. For example, if the
# $last_nonblank_type is '=~', then we are expecting a TERM, whereas
# if $last_nonblank_type is 'n' (numeric), we are expecting an
# OPERATOR.
#
# If a UNKNOWN is returned, the calling routine must guess. A major
# goal of this tokenizer is to minimize the possiblity of returning
# UNKNOWN, because a wrong guess can spoil the formatting of a
# script.
#
# adding NEW_TOKENS: it is critically important that this routine be
# updated to allow it to determine if an operator or term is to be
# expected after the new token. Doing this simply involves adding
# the new token character to one of the regexes in this routine or
# to one of the hash lists
# that it uses, which are initialized in the BEGIN section.
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
# $statement_type
my ( $prev_type, $tok, $next_type ) = @_;
my $op_expected = UNKNOWN;
#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
# Note: function prototype is available for token type 'U' for future
# program development. It contains the leading and trailing parens,
# and no blanks. It might be used to eliminate token type 'C', for
# example (prototype = '()'). Thus:
# if ($last_nonblank_type eq 'U') {
# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
# }
# A possible filehandle (or object) requires some care...
if ( $last_nonblank_type eq 'Z' ) {
# angle.t
if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
$op_expected = UNKNOWN;
}
# For possible file handle like "$a", Perl uses weird parsing rules.
# For example:
# print $a/2,"/hi"; - division
# print $a / 2,"/hi"; - division
# print $a/ 2,"/hi"; - division
# print $a /2,"/hi"; - pattern (and error)!
elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
$op_expected = TERM;
}
# Note when an operation is being done where a
# filehandle might be expected, since a change in whitespace
# could change the interpretation of the statement.
else {
if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
complain("operator in print statement not recommended\n");
$op_expected = OPERATOR;
}
}
}
# handle something after 'do' and 'eval'
elsif ( $is_block_operator{$last_nonblank_token} ) {
# something like $a = eval "expression";
# ^
if ( $last_nonblank_type eq 'k' ) {
$op_expected = TERM; # expression or list mode following keyword
}
# something like $a = do { BLOCK } / 2;
# ^
else {
$op_expected = OPERATOR; # block mode following }
}
}
# handle bare word..
elsif ( $last_nonblank_type eq 'w' ) {
# unfortunately, we can't tell what type of token to expect next
# after most bare words
$op_expected = UNKNOWN;
}
# operator, but not term possible after these types
# Note: moved ')' from type to token because parens in list context
# get marked as '{' '}' now. This is a minor glitch in the following:
# my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
#
elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
|| ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
{
$op_expected = OPERATOR;
# in a 'use' statement, numbers and v-strings are not true
# numbers, so to avoid incorrect error messages, we will
# mark them as unknown for now (use.t)
# TODO: it would be much nicer to create a new token V for VERSION
# number in a use statement. Then this could be a check on type V
# and related patches which change $statement_type for '=>'
# and ',' could be removed. Further, it would clean things up to
# scan the 'use' statement with a separate subroutine.
if ( ( $statement_type eq 'use' )
&& ( $last_nonblank_type =~ /^[nv]$/ ) )
{
$op_expected = UNKNOWN;
}
}
# no operator after many keywords, such as "die", "warn", etc
elsif ( $expecting_term_token{$last_nonblank_token} ) {
# patch for dor.t (defined or).
# perl functions which may be unary operators
# TODO: This list is incomplete, and these should be put
# into a hash.
if ( $tok eq '/'
&& $next_type eq '/'
&& $last_nonblank_type eq 'k'
&& $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
{
$op_expected = OPERATOR;
}
else {
$op_expected = TERM;
}
}
# no operator after things like + - ** (i.e., other operators)
elsif ( $expecting_term_types{$last_nonblank_type} ) {
$op_expected = TERM;
}
# a few operators, like "time", have an empty prototype () and so
# take no parameters but produce a value to operate on
elsif ( $expecting_operator_token{$last_nonblank_token} ) {
$op_expected = OPERATOR;
}
# post-increment and decrement produce values to be operated on
elsif ( $expecting_operator_types{$last_nonblank_type} ) {
$op_expected = OPERATOR;
}
# no value to operate on after sub block
elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
# a right brace here indicates the end of a simple block.
# all non-structural right braces have type 'R'
# all braces associated with block operator keywords have been given those
# keywords as "last_nonblank_token" and caught above.
# (This statement is order dependent, and must come after checking
# $last_nonblank_token).
elsif ( $last_nonblank_type eq '}' ) {
# patch for dor.t (defined or).
if ( $tok eq '/'
&& $next_type eq '/'
&& $last_nonblank_token eq ']' )
{
$op_expected = OPERATOR;
}
else {
$op_expected = TERM;
}
}
# something else..what did I forget?
else {
# collecting diagnostics on unknown operator types..see what was missed
$op_expected = UNKNOWN;
write_diagnostics(
"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
);
}
TOKENIZER_DEBUG_FLAG_EXPECT && do {
print
"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
};
return $op_expected;
}
sub new_statement_ok {
# return true if the current token can start a new statement
# USES GLOBAL VARIABLES: $last_nonblank_type
return label_ok() # a label would be ok here
|| $last_nonblank_type eq 'J'; # or we follow a label
}
sub label_ok {
# Decide if a bare word followed by a colon here is a label
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
# $brace_depth, @brace_type
# if it follows an opening or closing code block curly brace..
if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
&& $last_nonblank_type eq $last_nonblank_token )
{
# it is a label if and only if the curly encloses a code block
return $brace_type[$brace_depth];
}
# otherwise, it is a label if and only if it follows a ';'
# (real or fake)
else {
return ( $last_nonblank_type eq ';' );
}
}
sub code_block_type {
# Decide if this is a block of code, and its type.
# Must be called only when $type = $token = '{'
# The problem is to distinguish between the start of a block of code
# and the start of an anonymous hash reference
# Returns "" if not code block, otherwise returns 'last_nonblank_token'
# to indicate the type of code block. (For example, 'last_nonblank_token'
# might be 'if' for an if block, 'else' for an else block, etc).
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
# $last_nonblank_block_type, $brace_depth, @brace_type
# handle case of multiple '{'s
# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
if ( $last_nonblank_token eq '{'
&& $last_nonblank_type eq $last_nonblank_token )
{
# opening brace where a statement may appear is probably
# a code block but might be and anonymous hash reference
if ( $brace_type[$brace_depth] ) {
return decide_if_code_block( $i, $rtokens, $rtoken_type,
$max_token_index );
}
# cannot start a code block within an anonymous hash
else {
return "";
}
}
elsif ( $last_nonblank_token eq ';' ) {
# an opening brace where a statement may appear is probably
# a code block but might be and anonymous hash reference
return decide_if_code_block( $i, $rtokens, $rtoken_type,
$max_token_index );
}
# handle case of '}{'
elsif ($last_nonblank_token eq '}'
&& $last_nonblank_type eq $last_nonblank_token )
{
# a } { situation ...
# could be hash reference after code block..(blktype1.t)
if ($last_nonblank_block_type) {
return decide_if_code_block( $i, $rtokens, $rtoken_type,
$max_token_index );
}
# must be a block if it follows a closing hash reference
else {
return $last_nonblank_token;
}
}
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub is_non_structural_brace.
# elsif ( $last_nonblank_type eq 't' ) {
# return $last_nonblank_token;
# }
# brace after label:
elsif ( $last_nonblank_type eq 'J' ) {
return $last_nonblank_token;
}
# otherwise, look at previous token. This must be a code block if
# it follows any of these:
# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
elsif ( $is_code_block_token{$last_nonblank_token} ) {
return $last_nonblank_token;
}
# or a sub definition
elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
&& $last_nonblank_token =~ /^sub\b/ )
{
return $last_nonblank_token;
}
# user-defined subs with block parameters (like grep/map/eval)
elsif ( $last_nonblank_type eq 'G' ) {
return $last_nonblank_token;
}
# check bareword
elsif ( $last_nonblank_type eq 'w' ) {
return decide_if_code_block( $i, $rtokens, $rtoken_type,
$max_token_index );
}
# anything else must be anonymous hash reference
else {
return "";
}
}
sub decide_if_code_block {
# USES GLOBAL VARIABLES: $last_nonblank_token
my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
# we are at a '{' where a statement may appear.
# We must decide if this brace starts an anonymous hash or a code
# block.
# return "" if anonymous hash, and $last_nonblank_token otherwise
# initialize to be code BLOCK
my $code_block_type = $last_nonblank_token;
# Check for the common case of an empty anonymous hash reference:
# Maybe something like sub { { } }
if ( $next_nonblank_token eq '}' ) {
$code_block_type = "";
}
else {
# To guess if this '{' is an anonymous hash reference, look ahead
# and test as follows:
#
# it is a hash reference if next come:
# - a string or digit followed by a comma or =>
# - bareword followed by =>
# otherwise it is a code block
#
# Examples of anonymous hash ref:
# {'aa',};
# {1,2}
#
# Examples of code blocks:
# {1; print "hello\n", 1;}
# {$a,1};
# We are only going to look ahead one more (nonblank/comment) line.
# Strange formatting could cause a bad guess, but that's unlikely.
my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
my ( $rpre_tokens, $rpre_types ) =
peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
# generous, and prevents
# wasting lots of
# time in mangled files
if ( defined($rpre_types) && @$rpre_types ) {
push @pre_types, @$rpre_types;
push @pre_tokens, @$rpre_tokens;
}
# put a sentinal token to simplify stopping the search
push @pre_types, '}';
my $jbeg = 0;
$jbeg = 1 if $pre_types[0] eq 'b';
# first look for one of these
# - bareword
# - bareword with leading -
# - digit
# - quoted string
my $j = $jbeg;
if ( $pre_types[$j] =~ /^[\'\"]/ ) {
# find the closing quote; don't worry about escapes
my $quote_mark = $pre_types[$j];
for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
if ( $pre_types[$k] eq $quote_mark ) {
$j = $k + 1;
my $next = $pre_types[$j];
last;
}
}
}
elsif ( $pre_types[$j] eq 'd' ) {
$j++;
}
elsif ( $pre_types[$j] eq 'w' ) {
unless ( $is_keyword{ $pre_tokens[$j] } ) {
$j++;
}
}
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
$j++;
}
if ( $j > $jbeg ) {
$j++ if $pre_types[$j] eq 'b';
# it's a hash ref if a comma or => follow next
if ( $pre_types[$j] eq ','
|| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
{
$code_block_type = "";
}
}
}
return $code_block_type;
}
sub unexpected {
# report unexpected token type and show where it is
# USES GLOBAL VARIABLES: $tokenizer_self
my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
$rpretoken_type, $input_line )
= @_;
if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
my $msg = "found $found where $expecting expected";
my $pos = $$rpretoken_map[$i_tok];
interrupt_logfile();
my $input_line_number = $tokenizer_self->{_last_line_number};
my ( $offset, $numbered_line, $underline ) =
make_numbered_line( $input_line_number, $input_line, $pos );
$underline = write_on_underline( $underline, $pos - $offset, '^' );
my $trailer = "";
if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
my $pos_prev = $$rpretoken_map[$last_nonblank_i];
my $num;
if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
$num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
}
else {
$num = $pos - $pos_prev;
}
if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
$underline =
write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
$trailer = " (previous token underlined)";
}
warning( $numbered_line . "\n" );
warning( $underline . "\n" );
warning( $msg . $trailer . "\n" );
resume_logfile();
}
}
sub is_non_structural_brace {
# Decide if a brace or bracket is structural or non-structural
# by looking at the previous token and type
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
# Tentatively deactivated because it caused the wrong operator expectation
# for this code:
# $user = @vars[1] / 100;
# Must update sub operator_expected before re-implementing.
# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
# return 0;
# }
# NOTE: braces after type characters start code blocks, but for
# simplicity these are not identified as such. See also
# sub code_block_type
# if ($last_nonblank_type eq 't') {return 0}
# otherwise, it is non-structural if it is decorated
# by type information.
# For example, the '{' here is non-structural: ${xxx}
(
$last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
# or if we follow a hash or array closing curly brace or bracket
# For example, the second '{' in this is non-structural: $a{'x'}{'y'}
# because the first '}' would have been given type 'R'
|| $last_nonblank_type =~ /^([R\]])$/
);
}
#########i#############################################################
# Tokenizer routines for tracking container nesting depths
#######################################################################
# The following routines keep track of nesting depths of the nesting
# types, ( [ { and ?. This is necessary for determining the indentation
# level, and also for debugging programs. Not only do they keep track of
# nesting depths of the individual brace types, but they check that each
# of the other brace types is balanced within matching pairs. For
# example, if the program sees this sequence:
#
# { ( ( ) }
#
# then it can determine that there is an extra left paren somewhere
# between the { and the }. And so on with every other possible
# combination of outer and inner brace types. For another
# example:
#
# ( [ ..... ] ] )
#
# which has an extra ] within the parens.
#
# The brace types have indexes 0 .. 3 which are indexes into
# the matrices.
#
# The pair ? : are treated as just another nesting type, with ? acting
# as the opening brace and : acting as the closing brace.
#
# The matrix
#
# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
#
# saves the nesting depth of brace type $b (where $b is either of the other
# nesting types) when brace type $a enters a new depth. When this depth
# decreases, a check is made that the current depth of brace types $b is
# unchanged, or otherwise there must have been an error. This can
# be very useful for localizing errors, particularly when perl runs to
# the end of a large file (such as this one) and announces that there
# is a problem somewhere.
#
# A numerical sequence number is maintained for every nesting type,
# so that each matching pair can be uniquely identified in a simple
# way.
sub increase_nesting_depth {
my ( $aa, $pos ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
my $bb;
$current_depth[$aa]++;
$total_depth++;
$total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
my $input_line_number = $tokenizer_self->{_last_line_number};
my $input_line = $tokenizer_self->{_line_text};
# Sequence numbers increment by number of items. This keeps
# a unique set of numbers but still allows the relative location
# of any type to be determined.
$nesting_sequence_number[$aa] += scalar(@closing_brace_names);
my $seqno = $nesting_sequence_number[$aa];
$current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
$starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
[ $input_line_number, $input_line, $pos ];
for $bb ( 0 .. $#closing_brace_names ) {
next if ( $bb == $aa );
$depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
}
# set a flag for indenting a nested ternary statement
my $indent = 0;
if ( $aa == QUESTION_COLON ) {
$nested_ternary_flag[ $current_depth[$aa] ] = 0;
if ( $current_depth[$aa] > 1 ) {
if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
if ( $pdepth == $total_depth - 1 ) {
$indent = 1;
$nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
}
}
}
}
return ( $seqno, $indent );
}
sub decrease_nesting_depth {
my ( $aa, $pos ) = @_;
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
my $bb;
my $seqno = 0;
my $input_line_number = $tokenizer_self->{_last_line_number};
my $input_line = $tokenizer_self->{_line_text};
my $outdent = 0;
$total_depth--;
if ( $current_depth[$aa] > 0 ) {
# set a flag for un-indenting after seeing a nested ternary statement
$seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
if ( $aa == QUESTION_COLON ) {
$outdent = $nested_ternary_flag[ $current_depth[$aa] ];
}
# check that any brace types $bb contained within are balanced
for $bb ( 0 .. $#closing_brace_names ) {
next if ( $bb == $aa );
unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
$current_depth[$bb] )
{
my $diff =
$current_depth[$bb] -
$depth_array[$aa][$bb][ $current_depth[$aa] ];
# don't whine too many times
my $saw_brace_error = get_saw_brace_error();
if (
$saw_brace_error <= MAX_NAG_MESSAGES
# if too many closing types have occured, we probably
# already caught this error
&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
)
{
interrupt_logfile();
my $rsl =
$starting_line_of_current_depth[$aa]
[ $current_depth[$aa] ];
my $sl = $$rsl[0];
my $rel = [ $input_line_number, $input_line, $pos ];
my $el = $$rel[0];
my ($ess);
if ( $diff == 1 || $diff == -1 ) {
$ess = '';
}
else {
$ess = 's';
}
my $bname =
( $diff > 0 )
? $opening_brace_names[$bb]
: $closing_brace_names[$bb];
write_error_indicator_pair( @$rsl, '^' );
my $msg = <<"EOM";
Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
EOM
if ( $diff > 0 ) {
my $rml =
$starting_line_of_current_depth[$bb]
[ $current_depth[$bb] ];
my $ml = $$rml[0];
$msg .=
" The most recent un-matched $bname is on line $ml\n";
write_error_indicator_pair( @$rml, '^' );
}
write_error_indicator_pair( @$rel, '^' );
warning($msg);
resume_logfile();
}
increment_brace_error();
}
}
$current_depth[$aa]--;
}
else {
my $saw_brace_error = get_saw_brace_error();
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
my $msg = <<"EOM";
There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
EOM
indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
}
increment_brace_error();
}
return ( $seqno, $outdent );
}
sub check_final_nesting_depths {
my ($aa);
# USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
for $aa ( 0 .. $#closing_brace_names ) {
if ( $current_depth[$aa] ) {
my $rsl =
$starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
my $sl = $$rsl[0];
my $msg = <<"EOM";
Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
The most recent un-matched $opening_brace_names[$aa] is on line $sl
EOM
indicate_error( $msg, @$rsl, '^' );
increment_brace_error();
}
}
}
#########i#############################################################
# Tokenizer routines for looking ahead in input stream
#######################################################################
sub peek_ahead_for_n_nonblank_pre_tokens {
# returns next n pretokens if they exist
# returns undef's if hits eof without seeing any pretokens
# USES GLOBAL VARIABLES: $tokenizer_self
my $max_pretokens = shift;
my $line;
my $i = 0;
my ( $rpre_tokens, $rmap, $rpre_types );
while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
{
$line =~ s/^\s*//; # trim leading blanks
next if ( length($line) <= 0 ); # skip blank
next if ( $line =~ /^#/ ); # skip comment
( $rpre_tokens, $rmap, $rpre_types ) =
pre_tokenize( $line, $max_pretokens );
last;
}
return ( $rpre_tokens, $rpre_types );
}
# look ahead for next non-blank, non-comment line of code
sub peek_ahead_for_nonblank_token {
# USES GLOBAL VARIABLES: $tokenizer_self
my ( $rtokens, $max_token_index ) = @_;
my $line;
my $i = 0;
while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
{
$line =~ s/^\s*//; # trim leading blanks
next if ( length($line) <= 0 ); # skip blank
next if ( $line =~ /^#/ ); # skip comment
my ( $rtok, $rmap, $rtype ) =
pre_tokenize( $line, 2 ); # only need 2 pre-tokens
my $j = $max_token_index + 1;
my $tok;
foreach $tok (@$rtok) {
last if ( $tok =~ "\n" );
$$rtokens[ ++$j ] = $tok;
}
last;
}
return $rtokens;
}
#########i#############################################################
# Tokenizer guessing routines for ambiguous situations
#######################################################################
sub guess_if_pattern_or_conditional {
# this routine is called when we have encountered a ? following an
# unknown bareword, and we must decide if it starts a pattern or not
# input parameters:
# $i - token index of the ? starting possible pattern
# output parameters:
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern
# msg = a warning or diagnostic message
# USES GLOBAL VARIABLES: $last_nonblank_token
my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
my $is_pattern = 0;
my $msg = "guessing that ? after $last_nonblank_token starts a ";
if ( $i >= $max_token_index ) {
$msg .= "conditional (no end to pattern found on the line)\n";
}
else {
my $ibeg = $i;
$i = $ibeg + 1;
my $next_token = $$rtokens[$i]; # first token after ?
# look for a possible ending ? on this line..
my $in_quote = 1;
my $quote_depth = 0;
my $quote_character = '';
my $quote_pos = 0;
my $quoted_string;
(
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
$quoted_string
)
= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
$quote_pos, $quote_depth, $max_token_index );
if ($in_quote) {
# we didn't find an ending ? on this line,
# so we bias towards conditional
$is_pattern = 0;
$msg .= "conditional (no ending ? on this line)\n";
# we found an ending ?, so we bias towards a pattern
}
else {
if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
$is_pattern = 1;
$msg .= "pattern (found ending ? and pattern expected)\n";
}
else {
$msg .= "pattern (uncertain, but found ending ?)\n";
}
}
}
return ( $is_pattern, $msg );
}
sub guess_if_pattern_or_division {
# this routine is called when we have encountered a / following an
# unknown bareword, and we must decide if it starts a pattern or is a
# division
# input parameters:
# $i - token index of the / starting possible pattern
# output parameters:
# $is_pattern = 0 if probably division, =1 if probably a pattern
# msg = a warning or diagnostic message
# USES GLOBAL VARIABLES: $last_nonblank_token
my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
my $is_pattern = 0;
my $msg = "guessing that / after $last_nonblank_token starts a ";
if ( $i >= $max_token_index ) {
"division (no end to pattern found on the line)\n";
}
else {
my $ibeg = $i;
my $divide_expected =
numerator_expected( $i, $rtokens, $max_token_index );
$i = $ibeg + 1;
my $next_token = $$rtokens[$i]; # first token after slash
# look for a possible ending / on this line..
my $in_quote = 1;
my $quote_depth = 0;
my $quote_character = '';
my $quote_pos = 0;
my $quoted_string;
(
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
$quoted_string
)
= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
$quote_pos, $quote_depth, $max_token_index );
if ($in_quote) {
# we didn't find an ending / on this line,
# so we bias towards division
if ( $divide_expected >= 0 ) {
$is_pattern = 0;
$msg .= "division (no ending / on this line)\n";
}
else {
$msg = "multi-line pattern (division not possible)\n";
$is_pattern = 1;
}
}
# we found an ending /, so we bias towards a pattern
else {
if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
if ( $divide_expected >= 0 ) {
if ( $i - $ibeg > 60 ) {
$msg .= "division (matching / too distant)\n";
$is_pattern = 0;
}
else {
$msg .= "pattern (but division possible too)\n";
$is_pattern = 1;
}
}
else {
$is_pattern = 1;
$msg .= "pattern (division not possible)\n";
}
}
else {
if ( $divide_expected >= 0 ) {
$is_pattern = 0;
$msg .= "division (pattern not possible)\n";
}
else {
$is_pattern = 1;
$msg .=
"pattern (uncertain, but division would not work here)\n";
}
}
}
}
return ( $is_pattern, $msg );
}
# try to resolve here-doc vs. shift by looking ahead for
# non-code or the end token (currently only looks for end token)
# returns 1 if it is probably a here doc, 0 if not
sub guess_if_here_doc {
# This is how many lines we will search for a target as part of the
# guessing strategy. It is a constant because there is probably
# little reason to change it.
# USES GLOBAL VARIABLES: $tokenizer_self, $current_package
# %is_constant,
use constant HERE_DOC_WINDOW => 40;
my $next_token = shift;
my $here_doc_expected = 0;
my $line;
my $k = 0;
my $msg = "checking <<";
while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
{
chomp $line;
if ( $line =~ /^$next_token$/ ) {
$msg .= " -- found target $next_token ahead $k lines\n";
$here_doc_expected = 1; # got it
last;
}
last if ( $k >= HERE_DOC_WINDOW );
}
unless ($here_doc_expected) {
if ( !defined($line) ) {
$here_doc_expected = -1; # hit eof without seeing target
$msg .= " -- must be shift; target $next_token not in file\n";
}
else { # still unsure..taking a wild guess
if ( !$is_constant{$current_package}{$next_token} ) {
$here_doc_expected = 1;
$msg .=
" -- guessing it's a here-doc ($next_token not a constant)\n";
}
else {
$msg .=
" -- guessing it's a shift ($next_token is a constant)\n";
}
}
}
write_logfile_entry($msg);
return $here_doc_expected;
}
#########i#############################################################
# Tokenizer Routines for scanning identifiers and related items
#######################################################################
sub scan_bare_identifier_do {
# this routine is called to scan a token starting with an alphanumeric
# variable or package separator, :: or '.
# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
# $last_nonblank_type,@paren_type, $paren_depth
my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
$max_token_index )
= @_;
my $i_begin = $i;
my $package = undef;
my $i_beg = $i;
# we have to back up one pretoken at a :: since each : is one pretoken
if ( $tok eq '::' ) { $i_beg-- }
if ( $tok eq '->' ) { $i_beg-- }
my $pos_beg = $$rtoken_map[$i_beg];
pos($input_line) = $pos_beg;
# Examples:
# A::B::C
# A::
# ::A
# A'B
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
my $pos = pos($input_line);
my $numc = $pos - $pos_beg;
$tok = substr( $input_line, $pos_beg, $numc );
# type 'w' includes anything without leading type info
# ($,%,@,*) including something like abc::def::ghi
$type = 'w';
my $sub_name = "";
if ( defined($2) ) { $sub_name = $2; }
if ( defined($1) ) {
$package = $1;
# patch: don't allow isolated package name which just ends
# in the old style package separator (single quote). Example:
# use CGI':all';
if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
$pos--;
}
$package =~ s/\'/::/g;
if ( $package =~ /^\:/ ) { $package = 'main' . $package }
$package =~ s/::$//;
}
else {
$package = $current_package;
if ( $is_keyword{$tok} ) {
$type = 'k';
}
}
# if it is a bareword..
if ( $type eq 'w' ) {
# check for v-string with leading 'v' type character
# (This seems to have presidence over filehandle, type 'Y')
if ( $tok =~ /^v\d[_\d]*$/ ) {
# we only have the first part - something like 'v101' -
# look for more
if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
$pos = pos($input_line);
$numc = $pos - $pos_beg;
$tok = substr( $input_line, $pos_beg, $numc );
}
$type = 'v';
# warn if this version can't handle v-strings
report_v_string($tok);
}
elsif ( $is_constant{$package}{$sub_name} ) {
$type = 'C';
}
# bareword after sort has implied empty prototype; for example:
# @sorted = sort numerically ( 53, 29, 11, 32, 7 );
# This has priority over whatever the user has specified.
elsif ($last_nonblank_token eq 'sort'
&& $last_nonblank_type eq 'k' )
{
$type = 'Z';
}
# Note: strangely, perl does not seem to really let you create
# functions which act like eval and do, in the sense that eval
# and do may have operators following the final }, but any operators
# that you create with prototype (&) apparently do not allow
# trailing operators, only terms. This seems strange.
# If this ever changes, here is the update
# to make perltidy behave accordingly:
# elsif ( $is_block_function{$package}{$tok} ) {
# $tok='eval'; # patch to do braces like eval - doesn't work
# $type = 'k';
#}
# FIXME: This could become a separate type to allow for different
# future behavior:
elsif ( $is_block_function{$package}{$sub_name} ) {
$type = 'G';
}
elsif ( $is_block_list_function{$package}{$sub_name} ) {
$type = 'G';
}
elsif ( $is_user_function{$package}{$sub_name} ) {
$type = 'U';
$prototype = $user_function_prototype{$package}{$sub_name};
}
# check for indirect object
elsif (
# added 2001-03-27: must not be followed immediately by '('
# see fhandle.t
( $input_line !~ m/\G\(/gc )
# and
&& (
# preceded by keyword like 'print', 'printf' and friends
$is_indirect_object_taker{$last_nonblank_token}
# or preceded by something like 'print(' or 'printf('
|| (
( $last_nonblank_token eq '(' )
&& $is_indirect_object_taker{ $paren_type[$paren_depth]
}
)
)
)
{
# may not be indirect object unless followed by a space
if ( $input_line =~ m/\G\s+/gc ) {
$type = 'Y';
# Abandon Hope ...
# Perl's indirect object notation is a very bad
# thing and can cause subtle bugs, especially for
# beginning programmers. And I haven't even been
# able to figure out a sane warning scheme which
# doesn't get in the way of good scripts.
# Complain if a filehandle has any lower case
# letters. This is suggested good practice.
# Use 'sub_name' because something like
# main::MYHANDLE is ok for filehandle
if ( $sub_name =~ /[a-z]/ ) {
# could be bug caused by older perltidy if
# followed by '('
if ( $input_line =~ m/\G\s*\(/gc ) {
complain(
"Caution: unknown word '$tok' in indirect object slot\n"
);
}
}
}
# bareword not followed by a space -- may not be filehandle
# (may be function call defined in a 'use' statement)
else {
$type = 'Z';
}
}
}
# Now we must convert back from character position
# to pre_token index.
# I don't think an error flag can occur here ..but who knows
my $error;
( $i, $error ) =
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
if ($error) {
warning("scan_bare_identifier: Possibly invalid tokenization\n");
}
}
# no match but line not blank - could be syntax error
# perl will take '::' alone without complaint
else {
$type = 'w';
# change this warning to log message if it becomes annoying
warning("didn't find identifier after leading ::\n");
}
return ( $i, $tok, $type, $prototype );
}
sub scan_id_do {
# This is the new scanner and will eventually replace scan_identifier.
# Only type 'sub' and 'package' are implemented.
# Token types $ * % @ & -> are not yet implemented.
#
# Scan identifier following a type token.
# The type of call depends on $id_scan_state: $id_scan_state = ''
# for starting call, in which case $tok must be the token defining
# the type.
#
# If the type token is the last nonblank token on the line, a value
# of $id_scan_state = $tok is returned, indicating that further
# calls must be made to get the identifier. If the type token is
# not the last nonblank token on the line, the identifier is
# scanned and handled and a value of '' is returned.
# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
# $statement_type, $tokenizer_self
my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
$max_token_index )
= @_;
my $type = '';
my ( $i_beg, $pos_beg );
#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
#my ($a,$b,$c) = caller;
#print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
# on re-entry, start scanning at first token on the line
if ($id_scan_state) {
$i_beg = $i;
$type = '';
}
# on initial entry, start scanning just after type token
else {
$i_beg = $i + 1;
$id_scan_state = $tok;
$type = 't';
}
# find $i_beg = index of next nonblank token,
# and handle empty lines
my $blank_line = 0;
my $next_nonblank_token = $$rtokens[$i_beg];
if ( $i_beg > $max_token_index ) {
$blank_line = 1;
}
else {
# only a ' if ( $next_nonblank_token eq '#' ) {
unless ( $tok eq '$' ) {
$blank_line = 1;
}
}
if ( $next_nonblank_token =~ /^\s/ ) {
( $next_nonblank_token, $i_beg ) =
find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
$max_token_index );
if ( $next_nonblank_token =~ /(^ $blank_line = 1;
}
}
}
unless ($blank_line) {
if ( $id_scan_state eq 'sub' ) {
( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
$input_line, $i, $i_beg,
$tok, $type, $rtokens,
$rtoken_map, $id_scan_state, $max_token_index
);
}
elsif ( $id_scan_state eq 'package' ) {
( $i, $tok, $type ) =
do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
$rtoken_map, $max_token_index );
$id_scan_state = '';
}
else {
warning("invalid token in scan_id: $tok\n");
$id_scan_state = '';
}
}
if ( $id_scan_state && ( !defined($type) || !$type ) ) {
warning(
"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
);
report_definite_bug();
}
TOKENIZER_DEBUG_FLAG_NSCAN && do {
print
"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
};
return ( $i, $tok, $type, $id_scan_state );
}
sub check_prototype {
my ( $proto, $package, $subname ) = @_;
return unless ( defined($package) && defined($subname) );
if ( defined($proto) ) {
$proto =~ s/^\s*\(\s*//;
$proto =~ s/\s*\)$//;
if ($proto) {
$is_user_function{$package}{$subname} = 1;
$user_function_prototype{$package}{$subname} = "($proto)";
if ( $proto =~ /\&/ ) {
if ( $proto =~ /\&$/ ) {
$is_block_function{$package}{$subname} = 1;
}
elsif ( $proto !~ /\&$/ ) {
$is_block_list_function{$package}{$subname} = 1;
}
}
}
else {
$is_constant{$package}{$subname} = 1;
}
}
else {
$is_user_function{$package}{$subname} = 1;
}
}
sub do_scan_package {
my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
$max_token_index )
= @_;
my $package = undef;
my $pos_beg = $$rtoken_map[$i_beg];
pos($input_line) = $pos_beg;
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
$package = $1;
$package = ( defined($1) && $1 ) ? $1 : 'main';
$package =~ s/\'/::/g;
if ( $package =~ /^\:/ ) { $package = 'main' . $package }
$package =~ s/::$//;
my $pos = pos($input_line);
my $numc = $pos - $pos_beg;
$tok = 'package ' . substr( $input_line, $pos_beg, $numc );
$type = 'i';
# Now we must convert back from character position
# to pre_token index.
# I don't think an error flag can occur here ..but ?
my $error;
( $i, $error ) =
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
if ($error) { warning("Possibly invalid package\n") }
$current_package = $package;
my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token !~ /^[;\}]$/ ) {
warning(
"Unexpected '$next_nonblank_token' after package name '$tok'\n"
);
}
}
else {
$type = 'k';
}
return ( $i, $tok, $type );
}
sub scan_identifier_do {
my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
$expecting )
= @_;
my $i_begin = $i;
my $type = '';
my $tok_begin = $$rtokens[$i_begin];
if ( $tok_begin eq ':' ) { $tok_begin = '::' }
my $id_scan_state_begin = $id_scan_state;
my $identifier_begin = $identifier;
my $tok = $tok_begin;
my $message = "";
my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
my $saw_type;
my $allow_tick = ( $last_nonblank_token ne 'use' );
unless ($id_scan_state) {
$context = UNKNOWN_CONTEXT;
if ( $tok eq '>' ) {
$tok = '->';
$tok_begin = $tok;
}
$identifier = $tok;
if ( $tok eq '$' || $tok eq '*' ) {
$id_scan_state = '$';
$context = SCALAR_CONTEXT;
}
elsif ( $tok eq '%' || $tok eq '@' ) {
$id_scan_state = '$';
$context = LIST_CONTEXT;
}
elsif ( $tok eq '&' ) {
$id_scan_state = '&';
}
elsif ( $tok eq 'sub' or $tok eq 'package' ) {
$saw_alpha = 0; $id_scan_state = '$';
$identifier .= ' '; }
elsif ( $tok eq '::' ) {
$id_scan_state = 'A';
}
elsif ( $tok =~ /^[A-Za-z_]/ ) {
$id_scan_state = ':';
}
elsif ( $tok eq '->' ) {
$id_scan_state = '$';
}
else {
my ( $a, $b, $c ) = caller;
warning("Program Bug: scan_identifier given bad token = $tok \n");
warning(" called from sub $a line: $c\n");
report_definite_bug();
}
$saw_type = !$saw_alpha;
}
else {
$i--;
$saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
}
my $i_save = $i;
while ( $i < $max_token_index ) {
$i_save = $i unless ( $tok =~ /^\s*$/ );
$tok = $$rtokens[ ++$i ];
if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
$tok = '::';
$i++;
}
if ( $id_scan_state eq '$' ) {
if ( $tok eq '$' ) {
$identifier .= $tok;
if ( $i == $max_token_index ) {
$type = 'i';
$id_scan_state = '';
last;
}
}
elsif ( $tok =~ /^[A-Za-z_]/ ) { $saw_alpha = 1;
$id_scan_state = ':'; $identifier .= $tok;
}
elsif ( $tok eq "'" && $allow_tick ) { $saw_alpha = 1;
$id_scan_state = ':'; $identifier .= $tok;
}
elsif ( $tok =~ /^[0-9]/ ) { $saw_alpha = 1;
$id_scan_state = ':'; $identifier .= $tok;
}
elsif ( $tok eq '::' ) {
$id_scan_state = 'A';
$identifier .= $tok;
}
elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { $identifier .= $tok; }
elsif ( $tok eq '{' ) {
if ( $identifier eq '$'
&& $i + 2 <= $max_token_index
&& $$rtokens[ $i + 2 ] eq '}'
&& $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
{
my $next2 = $$rtokens[ $i + 2 ];
my $next1 = $$rtokens[ $i + 1 ];
$identifier .= $tok . $next1 . $next2;
$i += 2;
$id_scan_state = '';
last;
}
$id_scan_state = '';
if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
$i = $i_save;
last;
}
elsif ( $tok =~ /^\s*$/ ) {
if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
if ( length($identifier) > 1 ) {
$id_scan_state = '';
$i = $i_save;
$type = 'i'; last;
}
else {
if ( $identifier !~ /^[\@\$]$/ ) {
$message =
"Space in identifier, following $identifier\n";
}
}
}
}
elsif ( $tok eq '^' ) {
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
$identifier .= $tok;
$id_scan_state = 'A';
my $next1 = $$rtokens[ $i + 1 ];
if ( $next1 eq ']' ) {
$i++;
$identifier .= $next1;
$id_scan_state = "";
last;
}
}
else {
$id_scan_state = '';
}
}
else {
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
$identifier .= $tok;
}
elsif ( $identifier eq '$#' ) {
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
elsif ( $tok =~ /^[\:\-\+]$/ ) {
$type = 'i';
$identifier .= $tok;
}
else {
$i = $i_save;
write_logfile_entry( 'Use of $# is deprecated' . "\n" );
}
}
elsif ( $identifier eq '$$' ) {
$i = $i_save;
if ( $tok eq '{' ) { $type = 't' }
else { $type = 'i' }
}
elsif ( $identifier eq '->' ) {
$i = $i_save;
}
else {
$i = $i_save;
if ( length($identifier) == 1 ) { $identifier = ''; }
}
$id_scan_state = '';
last;
}
}
elsif ( $id_scan_state eq '&' ) {
if ( $tok =~ /^[\$A-Za-z_]/ ) { $id_scan_state = ':'; $saw_alpha = 1;
$identifier .= $tok;
}
elsif ( $tok eq "'" && $allow_tick ) { $id_scan_state = ':'; $saw_alpha = 1;
$identifier .= $tok;
}
elsif ( $tok =~ /^[0-9]/ ) { $id_scan_state = ':'; $saw_alpha = 1;
$identifier .= $tok;
}
elsif ( $tok =~ /^\s*$/ ) { }
elsif ( $tok eq '::' ) { $id_scan_state = 'A'; $identifier .= $tok;
}
elsif ( $tok eq '{' ) {
if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
$i = $i_save;
$id_scan_state = '';
last;
}
else {
if ( $identifier eq '&' && $expecting ) {
$identifier .= $tok;
}
else {
$identifier = '';
$i = $i_save;
$type = '&';
}
$id_scan_state = '';
last;
}
}
elsif ( $id_scan_state eq 'A' ) {
if ( $tok =~ /^[A-Za-z_]/ ) { $identifier .= $tok;
$id_scan_state = ':'; $saw_alpha = 1;
}
elsif ( $tok eq "'" && $allow_tick ) {
$identifier .= $tok;
$id_scan_state = ':'; $saw_alpha = 1;
}
elsif ( $tok =~ /^[0-9]/ ) { $identifier .= $tok;
$id_scan_state = ':'; $saw_alpha = 1;
}
elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
$id_scan_state = '(';
$identifier .= $tok;
}
elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
$id_scan_state = ')';
$identifier .= $tok;
}
else {
$id_scan_state = '';
$i = $i_save;
last;
}
}
elsif ( $id_scan_state eq ':' ) {
if ( $tok eq '::' ) { $identifier .= $tok;
$id_scan_state = 'A'; }
elsif ( $tok =~ /^[A-Za-z_]/ ) { $identifier .= $tok;
$id_scan_state = ':'; $saw_alpha = 1;
}
elsif ( $tok =~ /^[0-9]/ ) { $identifier .= $tok;
$id_scan_state = ':'; $saw_alpha = 1;
}
elsif ( $tok eq "'" && $allow_tick ) {
if ( $is_keyword{$identifier} ) {
$id_scan_state = ''; $i = $i_save;
}
else {
$identifier .= $tok;
}
}
elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
$id_scan_state = '(';
$identifier .= $tok;
}
elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
$id_scan_state = ')';
$identifier .= $tok;
}
else {
$id_scan_state = ''; $i = $i_save;
last;
}
}
elsif ( $id_scan_state eq '(' ) {
if ( $tok eq '(' ) { $identifier .= $tok;
$id_scan_state = ')'; }
elsif ( $tok =~ /^\s*$/ ) { $identifier .= $tok;
}
else {
$id_scan_state = ''; $i = $i_save;
last;
}
}
elsif ( $id_scan_state eq ')' ) {
if ( $tok eq ')' ) { $identifier .= $tok;
$id_scan_state = ''; last;
}
elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
$identifier .= $tok;
}
else { warning("Unexpected '$tok' while seeking end of prototype\n");
$identifier .= $tok;
}
}
else { $id_scan_state = '';
$i = $i_save;
last;
}
}
if ( $id_scan_state eq ')' ) {
warning("Hit end of line while seeking ) to end prototype\n");
}
if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
$id_scan_state = '';
}
if ( $i < 0 ) { $i = 0 }
unless ($type) {
if ($saw_type) {
if ($saw_alpha) {
if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
$type = 'w';
}
else { $type = 'i' }
}
elsif ( $identifier eq '->' ) {
$type = '->';
}
elsif (
( length($identifier) > 1 )
&& !( $identifier =~ /\$$/ && $tok eq '{' )
&& ( $identifier !~ /^(sub |package )$/ )
)
{
$type = 'i';
}
else { $type = 't' }
}
elsif ($saw_alpha) {
$type = 'w';
}
else {
$type = '';
} }
if ($identifier) {
$tok = $identifier;
if ($message) { write_logfile_entry($message) }
}
else {
$tok = $tok_begin;
$i = $i_begin;
}
TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
my ( $a, $b, $c ) = caller;
print
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
print
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
};
return ( $i, $tok, $type, $id_scan_state, $identifier );
}
{
my ( $package_saved, $subname_saved );
sub do_scan_sub {
my (
$input_line, $i, $i_beg,
$tok, $type, $rtokens,
$rtoken_map, $id_scan_state, $max_token_index
) = @_;
$id_scan_state = ""; my $subname = undef;
my $package = undef;
my $proto = undef;
my $attrs = undef;
my $match;
my $pos_beg = $$rtoken_map[$i_beg];
pos($input_line) = $pos_beg;
if (
$input_line =~ m/\G\s*
((?:\w*(?:'|::))*) # package - something that ends in :: or '
(\w+) (\s*\([^){]*\))? (\s*:)? /gcx
)
{
$match = 1;
$subname = $2;
$proto = $3;
$attrs = $4;
$package = ( defined($1) && $1 ) ? $1 : $current_package;
$package =~ s/\'/::/g;
if ( $package =~ /^\:/ ) { $package = 'main' . $package }
$package =~ s/::$//;
my $pos = pos($input_line);
my $numc = $pos - $pos_beg;
$tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
$type = 'i';
}
elsif (
$input_line =~ m/\G(\s*\([^){]*\))? (\s*:)? /gcx
&& ( $1 || $2 )
)
{
$match = 1;
$proto = $1;
$attrs = $2;
if ($subname_saved) {
$package = $package_saved;
$subname = $subname_saved;
$tok = $last_nonblank_token;
}
$type = 'i';
}
if ($match) {
my $pos = pos($input_line);
if ($attrs) {
$pos -= length($attrs);
}
my $next_nonblank_token = $tok;
if ( $pos == $pos_beg && $tok eq ':' ) {
$type = 'A';
$in_attribute_list = 1;
}
else {
my $error;
( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
$max_token_index );
if ($error) { warning("Possibly invalid sub\n") }
( $next_nonblank_token, my $i_next ) =
find_next_nonblank_token_on_this_line( $i, $rtokens,
$max_token_index );
}
if ( $next_nonblank_token =~ /^(\s*| { my ( $rpre_tokens, $rpre_types ) =
peek_ahead_for_n_nonblank_pre_tokens(1);
if ( defined($rpre_tokens) && @$rpre_tokens ) {
$next_nonblank_token = $rpre_tokens->[0];
}
else {
$next_nonblank_token = '}';
}
}
$package_saved = "";
$subname_saved = "";
if ( $next_nonblank_token eq '{' ) {
if ($subname) {
if ( $saw_function_definition{$package}{$subname}
&& $subname !~ /^[A-Z]+$/ )
{
my $lno = $saw_function_definition{$package}{$subname};
warning(
"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
);
}
$saw_function_definition{$package}{$subname} =
$tokenizer_self->{_last_line_number};
}
}
elsif ( $next_nonblank_token eq ';' ) {
}
elsif ( $next_nonblank_token eq '}' ) {
}
elsif ( $next_nonblank_token eq ':' ) {
$statement_type = $tok;
}
elsif ( $next_nonblank_token eq '(' ) {
if ( $attrs || $proto ) {
warning(
"unexpected '(' after definition or declaration of sub '$subname'\n"
);
}
else {
$id_scan_state = 'sub'; $statement_type = $tok;
$package_saved = $package;
$subname_saved = $subname;
}
}
elsif ($next_nonblank_token) { warning(
"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
);
}
check_prototype( $proto, $package, $subname );
}
else {
}
return ( $i, $tok, $type, $id_scan_state );
}
}
sub find_next_nonblank_token {
my ( $i, $rtokens, $max_token_index ) = @_;
if ( $i >= $max_token_index ) {
if ( !peeked_ahead() ) {
peeked_ahead(1);
$rtokens =
peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
}
}
my $next_nonblank_token = $$rtokens[ ++$i ];
if ( $next_nonblank_token =~ /^\s*$/ ) {
$next_nonblank_token = $$rtokens[ ++$i ];
}
return ( $next_nonblank_token, $i );
}
sub numerator_expected {
my ( $i, $rtokens, $max_token_index ) = @_;
my $next_token = $$rtokens[ $i + 1 ];
if ( $next_token eq '=' ) { $i++; } my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
1;
}
else {
if ( $next_nonblank_token =~ /^\s*$/ ) {
0;
}
else {
-1;
}
}
}
sub pattern_expected {
my ( $i, $rtokens, $max_token_index ) = @_;
my $next_token = $$rtokens[ $i + 1 ];
if ( $next_token =~ /^[cgimosxp]/ ) { $i++; } my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
{
1;
}
else {
if ( $next_nonblank_token =~ /^\s*$/ ) {
0;
}
else {
-1;
}
}
}
sub find_next_nonblank_token_on_this_line {
my ( $i, $rtokens, $max_token_index ) = @_;
my $next_nonblank_token;
if ( $i < $max_token_index ) {
$next_nonblank_token = $$rtokens[ ++$i ];
if ( $next_nonblank_token =~ /^\s*$/ ) {
if ( $i < $max_token_index ) {
$next_nonblank_token = $$rtokens[ ++$i ];
}
}
}
else {
$next_nonblank_token = "";
}
return ( $next_nonblank_token, $i );
}
sub find_angle_operator_termination {
my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
my $i = $i_beg;
my $type = '<';
pos($input_line) = 1 + $$rtoken_map[$i];
my $filter;
if ( $expecting == TERM ) { $filter = '[\>]' }
elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
else { warning("Program Bug in find_angle_operator_termination\n") }
if ( $input_line =~ /($filter)/g ) {
if ( $1 eq '>' ) {
my $pos = pos($input_line);
my $pos_beg = $$rtoken_map[$i];
my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
if ( $expecting eq UNKNOWN ) {
my $check = substr( $input_line, $pos - 2, 1 );
if ( $check eq '-' ) {
return ( $i, $type );
}
}
$type = 'Q';
my $error;
( $i, $error ) =
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
if ($error) {
warning(
"Possible tokinization error..please check this line\n");
report_possible_bug();
}
if ( $expecting == TERM ) {
}
elsif ( $i <= $i_beg + 3 ) {
write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
}
else {
my $br = 0;
while ( $str =~ /\{/g ) { $br++ }
while ( $str =~ /\}/g ) { $br-- }
my $sb = 0;
while ( $str =~ /\[/g ) { $sb++ }
while ( $str =~ /\]/g ) { $sb-- }
my $pr = 0;
while ( $str =~ /\(/g ) { $pr++ }
while ( $str =~ /\)/g ) { $pr-- }
if ( $br || $sb || $pr ) {
$i = $i_beg;
$type = '<';
write_diagnostics(
"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
}
else {
write_diagnostics(
"ANGLE-Guessing yes: $str expecting=$expecting\n");
write_logfile_entry("Guessing angle operator here: $str\n");
}
}
}
else {
if ( $expecting == TERM ) {
warning("No ending > for angle operator\n");
}
}
}
return ( $i, $type );
}
sub scan_number_do {
my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
my $pos_beg = $$rtoken_map[$i];
my $pos;
my $i_begin = $i;
my $number = undef;
my $type = $input_type;
my $first_char = substr( $input_line, $pos_beg, 1 );
if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
warning("Program bug - scan_number given character $first_char\n");
report_definite_bug();
return ( $i, $type, $number );
}
pos($input_line) = $pos_beg;
if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
$pos = pos($input_line);
my $numc = $pos - $pos_beg;
$number = substr( $input_line, $pos_beg, $numc );
$type = 'v';
report_v_string($number);
}
if ( !defined($number) ) {
pos($input_line) = $pos_beg;
if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
{
$pos = pos($input_line);
my $numc = $pos - $pos_beg;
$number = substr( $input_line, $pos_beg, $numc );
$type = 'n';
}
}
if ( !defined($number) ) {
pos($input_line) = $pos_beg;
if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
$pos = pos($input_line);
if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
&& ( substr( $input_line, $pos, 1 ) eq '.' ) )
{
$pos--;
}
my $numc = $pos - $pos_beg;
$number = substr( $input_line, $pos_beg, $numc );
$type = 'n';
}
}
if (
$number !~ /\d/ || ( $number =~ /^(.*)[eE]/
&& $1 !~ /\d/ ) )
{
$number = undef;
$type = $input_type;
return ( $i, $type, $number );
}
my $error;
( $i, $error ) =
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
if ($error) { warning("Possibly invalid number\n") }
return ( $i, $type, $number );
}
sub inverse_pretoken_map {
my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
my $error = 0;
while ( ++$i <= $max_token_index ) {
if ( $pos <= $$rtoken_map[$i] ) {
if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
$i--;
last;
}
}
return ( $i, $error );
}
sub find_here_doc {
my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
my $ibeg = $i;
my $found_target = 0;
my $here_doc_target = '';
my $here_quote_character = '';
my $saw_error = 0;
my ( $next_nonblank_token, $i_next_nonblank, $next_token );
$next_token = $$rtokens[ $i + 1 ];
my $backslash = 0;
if ( $next_token eq '\\' ) {
$backslash = 1;
$next_token = $$rtokens[ $i + 2 ];
}
( $next_nonblank_token, $i_next_nonblank ) =
find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
my $in_quote = 1;
my $quote_depth = 0;
my $quote_pos = 0;
my $quoted_string;
(
$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
$quoted_string
)
= follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
$here_quote_character, $quote_pos, $quote_depth, $max_token_index );
if ($in_quote) { $i = $ibeg;
if ( $expecting == TERM ) {
warning(
"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
);
$saw_error = 1;
}
}
else { my $j;
$found_target = 1;
my $tokj;
for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
$tokj = $$rtokens[$j];
next
if ( $tokj eq "\\"
&& $j < $i - 1
&& $$rtokens[ $j + 1 ] eq $here_quote_character );
$here_doc_target .= $tokj;
}
}
}
elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
$found_target = 1;
write_logfile_entry(
"found blank here-target after <<; suggest using \"\"\n");
$i = $ibeg;
}
elsif ( $next_token =~ /^\w/ ) {
my $here_doc_expected;
if ( $expecting == UNKNOWN ) {
$here_doc_expected = guess_if_here_doc($next_token);
}
else {
$here_doc_expected = 1;
}
if ($here_doc_expected) {
$found_target = 1;
$here_doc_target = $next_token;
$i = $ibeg + 1;
}
}
else {
if ( $expecting == TERM ) {
$found_target = 1;
write_logfile_entry("Note: bare here-doc operator <<\n");
}
else {
$i = $ibeg;
}
}
if ( $found_target && $backslash ) { $i++ }
return ( $found_target, $here_doc_target, $here_quote_character, $i,
$saw_error );
}
sub do_quote {
my (
$i, $in_quote, $quote_character,
$quote_pos, $quote_depth, $quoted_string_1,
$quoted_string_2, $rtokens, $rtoken_map,
$max_token_index
) = @_;
my $in_quote_starting = $in_quote;
my $quoted_string;
if ( $in_quote == 2 ) { my $ibeg = $i;
(
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
$quoted_string
)
= follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
$quote_pos, $quote_depth, $max_token_index );
$quoted_string_2 .= $quoted_string;
if ( $in_quote == 1 ) {
if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
$quote_character = '';
}
else {
$quoted_string_2 .= "\n";
}
}
if ( $in_quote == 1 ) { my $ibeg = $i;
(
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
$quoted_string
)
= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
$quote_pos, $quote_depth, $max_token_index );
$quoted_string_1 .= $quoted_string;
if ( $in_quote == 1 ) {
$quoted_string_1 .= "\n";
}
}
return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
$quoted_string_1, $quoted_string_2 );
}
sub follow_quoted_string {
my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
$max_token_index )
= @_;
my ( $tok, $end_tok );
my $i = $i_beg - 1;
my $quoted_string = "";
TOKENIZER_DEBUG_FLAG_QUOTE && do {
print
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
};
if ( $beginning_tok !~ /^\s*$/ ) {
$end_tok = matching_end_token($beginning_tok);
}
else {
my $allow_quote_comments = ( $i < 0 ) ? 1 : 0;
while ( $i < $max_token_index ) {
$tok = $$rtokens[ ++$i ];
if ( $tok !~ /^\s*$/ ) {
if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
$i = $max_token_index;
}
else {
if ( length($tok) > 1 ) {
if ( $quote_pos <= 0 ) { $quote_pos = 1 }
$beginning_tok = substr( $tok, $quote_pos - 1, 1 );
}
else {
$beginning_tok = $tok;
$quote_pos = 0;
}
$end_tok = matching_end_token($beginning_tok);
$quote_depth = 1;
last;
}
}
else {
$allow_quote_comments = 1;
}
}
}
if ( $beginning_tok =~ /\w/ ) {
if ( $in_quote == 1 ) {
write_logfile_entry(
"Note: alphanumeric quote delimiter ($beginning_tok) \n");
}
while ( $i < $max_token_index ) {
if ( $quote_pos == 0 || ( $i < 0 ) ) {
$tok = $$rtokens[ ++$i ];
if ( $tok eq '\\' ) {
$quoted_string .= $tok
unless $$rtokens[ $i + 1 ] eq $end_tok;
$quote_pos++;
last if ( $i >= $max_token_index );
$tok = $$rtokens[ ++$i ];
}
}
my $old_pos = $quote_pos;
unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
{
}
$quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
if ( $quote_pos > 0 ) {
$quoted_string .=
substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
$quote_depth--;
if ( $quote_depth == 0 ) {
$in_quote--;
last;
}
}
else {
$quoted_string .= substr( $tok, $old_pos );
}
}
}
else {
while ( $i < $max_token_index ) {
$tok = $$rtokens[ ++$i ];
if ( $tok eq $end_tok ) {
$quote_depth--;
if ( $quote_depth == 0 ) {
$in_quote--;
last;
}
}
elsif ( $tok eq $beginning_tok ) {
$quote_depth++;
}
elsif ( $tok eq '\\' ) {
$tok = $$rtokens[ ++$i ];
$quoted_string .= '\\'
unless ( $tok eq $end_tok || $tok eq $beginning_tok );
}
$quoted_string .= $tok;
}
}
if ( $i > $max_token_index ) { $i = $max_token_index }
return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
$quoted_string );
}
sub indicate_error {
my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
interrupt_logfile();
warning($msg);
write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
resume_logfile();
}
sub write_error_indicator_pair {
my ( $line_number, $input_line, $pos, $carrat ) = @_;
my ( $offset, $numbered_line, $underline ) =
make_numbered_line( $line_number, $input_line, $pos );
$underline = write_on_underline( $underline, $pos - $offset, $carrat );
warning( $numbered_line . "\n" );
$underline =~ s/\s*$//;
warning( $underline . "\n" );
}
sub make_numbered_line {
my ( $lineno, $str, $pos ) = @_;
my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
my $excess = length($str) - $offset - 68;
my $numc = ( $excess > 0 ) ? 68 : undef;
if ( defined($numc) ) {
if ( $offset == 0 ) {
$str = substr( $str, $offset, $numc - 4 ) . " ...";
}
else {
$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
}
}
else {
if ( $offset == 0 ) {
}
else {
$str = "... " . substr( $str, $offset + 4 );
}
}
my $numbered_line = sprintf( "%d: ", $lineno );
$offset -= length($numbered_line);
$numbered_line .= $str;
my $underline = " " x length($numbered_line);
return ( $offset, $numbered_line, $underline );
}
sub write_on_underline {
my ( $underline, $pos, $pos_chr ) = @_;
unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
return $underline;
}
my $excess = length($pos_chr) + $pos - length($underline);
if ( $excess > 0 ) {
$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
}
substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
return ($underline);
}
sub pre_tokenize {
my ( $str, $max_tokens_wanted ) = @_;
my @tokens = (); my @token_map = (0); my @type = ();
do {
if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
else {
return ( \@tokens, \@token_map, \@type );
}
push @tokens, $1;
push @token_map, pos($str);
} while ( --$max_tokens_wanted != 0 );
return ( \@tokens, \@token_map, \@type );
}
sub show_tokens {
my ( $rtokens, $rtoken_map ) = @_;
my $num = scalar(@$rtokens);
my $i;
for ( $i = 0 ; $i < $num ; $i++ ) {
my $len = length( $$rtokens[$i] );
print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
}
}
sub matching_end_token {
my $beginning_token = shift;
if ( $beginning_token eq '{' ) {
'}';
}
elsif ( $beginning_token eq '[' ) {
']';
}
elsif ( $beginning_token eq '<' ) {
'>';
}
elsif ( $beginning_token eq '(' ) {
')';
}
else {
$beginning_token;
}
}
sub dump_token_types {
my $class = shift;
my $fh = shift;
print $fh <<'END_OF_LIST';
Here is a list of the token types currently used for lines of type 'CODE'.
For the following tokens, the "type" of a token is just the token itself.
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
( ) <= >= == =~ !~ != ++ -- /= x=
... **= <<= >>= &&= ||= //= <=>
, + - / * | % ! x ~ = \ ? : . < > ^ &
The following additional token types are defined:
type meaning
b blank (white space)
{ indent: opening structural curly brace or square bracket or paren
(code block, anonymous hash reference, or anonymous array reference)
} outdent: right structural curly brace or square bracket or paren
[ left non-structural square bracket (enclosing an array index)
] right non-structural square bracket
( left non-structural paren (all but a list right of an =)
) right non-structural parena
L left non-structural curly brace (enclosing a key)
R right non-structural curly brace
; terminal semicolon
f indicates a semicolon in a "for" statement
h here_doc operator <<
Q indicates a quote or pattern
q indicates a qw quote block
k a perl keyword
C user-defined constant or constant function (with void prototype = ())
U user-defined function taking parameters
G user-defined function taking block parameter (like grep/map/eval)
M (unused, but reserved for subroutine definition name)
P (unused, but -html uses it to label pod text)
t type indicater such as %,$,@,*,&,sub
w bare word (perhaps a subroutine call)
i identifier of some type (with leading %, $, @, *, &, sub, -> )
n a number
v a v-string
F a file test operator (like -e)
Y File handle
Z identifier in indirect object slot: may be file handle, object
J LABEL: code block label
j LABEL after next, last, redo, goto
p unary +
m unary -
pp pre-increment operator ++
mm pre-decrement operator --
A : used as attribute separator
Here are the '_line_type' codes used internally:
SYSTEM - system-specific code before hash-bang line
CODE - line of perl code (including comments)
POD_START - line starting pod, such as '=head'
POD - pod documentation text
POD_END - last line of pod section, '=cut'
HERE - text of here-document
HERE_END - last line of here-doc (target word)
FORMAT - format section
FORMAT_END - last line of format section, '.'
DATA_START - __DATA__ line
DATA - unidentified text following __DATA__
END_START - __END__ line
END - unidentified text following __END__
ERROR - we are in big trouble, probably not a perl script
END_OF_LIST
}
BEGIN {
@opening_brace_names = qw @closing_brace_names = qw
my @digraphs = qw(
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
<= >= == =~ !~ != ++ -- /= x= ~~
);
@is_digraph{@digraphs} = (1) x scalar(@digraphs);
my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
my @valid_token_types = qw A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
{ } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
push( @valid_token_types, @digraphs );
push( @valid_token_types, @trigraphs );
push( @valid_token_types, '#' );
push( @valid_token_types, ',' );
@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
my @file_test_operators =
qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
@is_file_test_operator{@file_test_operators} =
(1) x scalar(@file_test_operators);
@_ = qw( do eval );
@is_block_operator{@_} = (1) x scalar(@_);
@_ = qw( print printf sort exec system say);
@is_indirect_object_taker{@_} = (1) x scalar(@_);
@_ =
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
unless do while until eval for foreach map grep sort
switch case given when);
@is_code_block_token{@_} = (1) x scalar(@_);
my @Keywords = ();
my @value_requestor = qw(
AUTOLOAD
BEGIN
CHECK
DESTROY
END
EQ
GE
GT
INIT
LE
LT
NE
UNITCHECK
abs
accept
alarm
and
atan2
bind
binmode
bless
break
caller
chdir
chmod
chomp
chop
chown
chr
chroot
close
closedir
cmp
connect
continue
cos
crypt
dbmclose
dbmopen
defined
delete
die
dump
each
else
elsif
eof
eq
exec
exists
exit
exp
fcntl
fileno
flock
for
foreach
formline
ge
getc
getgrgid
getgrnam
gethostbyaddr
gethostbyname
getnetbyaddr
getnetbyname
getpeername
getpgrp
getpriority
getprotobyname
getprotobynumber
getpwnam
getpwuid
getservbyname
getservbyport
getsockname
getsockopt
glob
gmtime
goto
grep
gt
hex
if
index
int
ioctl
join
keys
kill
last
lc
lcfirst
le
length
link
listen
local
localtime
lock
log
lstat
lt
map
mkdir
msgctl
msgget
msgrcv
msgsnd
my
ne
next
no
not
oct
open
opendir
or
ord
our
pack
pipe
pop
pos
print
printf
prototype
push
quotemeta
rand
read
readdir
readlink
readline
readpipe
recv
redo
ref
rename
require
reset
return
reverse
rewinddir
rindex
rmdir
scalar
seek
seekdir
select
semctl
semget
semop
send
sethostent
setnetent
setpgrp
setpriority
setprotoent
setservent
setsockopt
shift
shmctl
shmget
shmread
shmwrite
shutdown
sin
sleep
socket
socketpair
sort
splice
split
sprintf
sqrt
srand
stat
study
substr
symlink
syscall
sysopen
sysread
sysseek
system
syswrite
tell
telldir
tie
tied
truncate
uc
ucfirst
umask
undef
unless
unlink
unpack
unshift
untie
until
use
utime
values
vec
waitpid
warn
while
write
xor
switch
case
given
when
err
say
);
push( @Keywords, @value_requestor );
my @extra_vr = qw(
constant
vars
);
push( @value_requestor, @extra_vr );
@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
my @operator_requestor = qw(
endgrent
endhostent
endnetent
endprotoent
endpwent
endservent
fork
getgrent
gethostent
getlogin
getnetent
getppid
getprotoent
getpwent
getservent
setgrent
setpwent
time
times
wait
wantarray
);
push( @Keywords, @operator_requestor );
my @extra_or = qw(
STDERR
STDIN
STDOUT
);
push( @operator_requestor, @extra_or );
@expecting_operator_token{@operator_requestor} =
(1) x scalar(@operator_requestor);
my @operator_requestor_types = qw( ++ -- C <> q );
@expecting_operator_types{@operator_requestor_types} =
(1) x scalar(@operator_requestor_types);
my @value_requestor_type = qw L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
f F pp mm Y p m U J G j >> << ^ t
push( @value_requestor_type, ',' )
; @expecting_term_types{@value_requestor_type} =
(1) x scalar(@value_requestor_type);
%really_want_term = %expecting_term_types;
delete $really_want_term{'U'}; delete $really_want_term{'F'}; delete $really_want_term{'Y'};
@_ = qw(q qq qw qx qr s y tr m);
@is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
my @special_keywords = qw(
do
eval
format
m
package
q
qq
qr
qw
qx
s
sub
tr
y
);
push( @Keywords, @special_keywords );
my @keyword_taking_list = qw(
and
chmod
chomp
chop
chown
dbmopen
die
elsif
exec
fcntl
for
foreach
formline
getsockopt
if
index
ioctl
join
kill
local
msgctl
msgrcv
msgsnd
my
open
or
our
pack
print
printf
push
read
readpipe
recv
return
reverse
rindex
seek
select
semctl
semget
send
setpriority
setsockopt
shmctl
shmget
shmread
shmwrite
socket
socketpair
sort
splice
split
sprintf
substr
syscall
sysopen
sysread
sysseek
system
syswrite
tie
unless
unlink
unpack
unshift
until
vec
warn
while
);
@is_keyword_taking_list{@keyword_taking_list} =
(1) x scalar(@keyword_taking_list);
@is_keyword{@Keywords} = (1) x scalar(@Keywords);
}
1;
__END__
=head1 NAME
Perl::Tidy - Parses and beautifies perl source
=head1 SYNOPSIS
use Perl::Tidy;
Perl::Tidy::perltidy(
source => $source,
destination => $destination,
stderr => $stderr,
argv => $argv,
perltidyrc => $perltidyrc,
logfile => $logfile,
errorfile => $errorfile,
formatter => $formatter, # callback object (see below)
dump_options => $dump_options,
dump_options_type => $dump_options_type,
);
=head1 DESCRIPTION
This module makes the functionality of the perltidy utility available to perl
scripts. Any or all of the input parameters may be omitted, in which case the
@ARGV array will be used to provide input parameters as described
in the perltidy(1) man page.
For example, the perltidy script is basically just this:
use Perl::Tidy;
Perl::Tidy::perltidy();
The module accepts input and output streams by a variety of methods.
The following list of parameters may be any of a the following: a
filename, an ARRAY reference, a SCALAR reference, or an object with
either a B<getline> or B<print> method, as appropriate.
source - the source of the script to be formatted
destination - the destination of the formatted output
stderr - standard error output
perltidyrc - the .perltidyrc file
logfile - the .LOG file stream, if any
errorfile - the .ERR file stream, if any
dump_options - ref to a hash to receive parameters (see below),
dump_options_type - controls contents of dump_options
dump_getopt_flags - ref to a hash to receive Getopt flags
dump_options_category - ref to a hash giving category of options
dump_abbreviations - ref to a hash giving all abbreviations
The following chart illustrates the logic used to decide how to
treat a parameter.
ref($param) $param is assumed to be:
----------- ---------------------
undef a filename
SCALAR ref to string
ARRAY ref to array
(other) object with getline (if source) or print method
If the parameter is an object, and the object has a B<close> method, that
close method will be called at the end of the stream.
=over 4
=item source
If the B<source> parameter is given, it defines the source of the
input stream.
=item destination
If the B<destination> parameter is given, it will be used to define the
file or memory location to receive output of perltidy.
=item stderr
The B<stderr> parameter allows the calling program to capture the output
to what would otherwise go to the standard error output device.
=item perltidyrc
If the B<perltidyrc> file is given, it will be used instead of any
F<.perltidyrc> configuration file that would otherwise be used.
=item argv
If the B<argv> parameter is given, it will be used instead of the
B<@ARGV> array. The B<argv> parameter may be a string, a reference to a
string, or a reference to an array. If it is a string or reference to a
string, it will be parsed into an array of items just as if it were a
command line string.
=item dump_options
If the B<dump_options> parameter is given, it must be the reference to a hash.
In this case, the parameters contained in any perltidyrc configuration file
will be placed in this hash and perltidy will return immediately. This is
equivalent to running perltidy with --dump-options, except that the perameters
are returned in a hash rather than dumped to standard output. Also, by default
only the parameters in the perltidyrc file are returned, but this can be
changed (see the next parameter). This parameter provides a convenient method
for external programs to read a perltidyrc file. An example program using
this feature, F<perltidyrc_dump.pl>, is included in the distribution.
Any combination of the B<dump_> parameters may be used together.
=item dump_options_type
This parameter is a string which can be used to control the parameters placed
in the hash reference supplied by B<dump_options>. The possible values are
'perltidyrc' (default) and 'full'. The 'full' parameter causes both the
default options plus any options found in a perltidyrc file to be returned.
=item dump_getopt_flags
If the B<dump_getopt_flags> parameter is given, it must be the reference to a
hash. This hash will receive all of the parameters that perltidy understands
and flags that are passed to Getopt::Long. This parameter may be
used alone or with the B<dump_options> flag. Perltidy will
exit immediately after filling this hash. See the demo program
F<perltidyrc_dump.pl> for example usage.
=item dump_options_category
If the B<dump_options_category> parameter is given, it must be the reference to a
hash. This hash will receive a hash with keys equal to all long parameter names
and values equal to the title of the corresponding section of the perltidy manual.
See the demo program F<perltidyrc_dump.pl> for example usage.
=item dump_abbreviations
If the B<dump_abbreviations> parameter is given, it must be the reference to a
hash. This hash will receive all abbreviations used by Perl::Tidy. See the
demo program F<perltidyrc_dump.pl> for example usage.
=back
=head1 EXAMPLE
The following example passes perltidy a snippet as a reference
to a string and receives the result back in a reference to
an array.
use Perl::Tidy;
# some messy source code to format
my $source = <<'EOM';
use strict;
my @editors=('Emacs', 'Vi '); my $rand = rand();
print "A poll of 10 random programmers gave these results:\n";
foreach(0..10) {
my $i=int ($rand+rand());
print " $editors[$i] users are from Venus" . ", " .
"$editors[1-$i] users are from Mars" .
"\n";
}
EOM
# We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
my @dest;
perltidy( source => \$source, destination => \@dest );
foreach (@dest) {print}
=head1 Using the B<formatter> Callback Object
The B<formatter> parameter is an optional callback object which allows
the calling program to receive tokenized lines directly from perltidy for
further specialized processing. When this parameter is used, the two
formatting options which are built into perltidy (beautification or
html) are ignored. The following diagram illustrates the logical flow:
|-- (normal route) -> code beautification
caller->perltidy->|-- (-html flag ) -> create html
|-- (formatter given)-> callback to write_line
This can be useful for processing perl scripts in some way. The
parameter C<$formatter> in the perltidy call,
formatter => $formatter,
is an object created by the caller with a C<write_line> method which
will accept and process tokenized lines, one line per call. Here is
a simple example of a C<write_line> which merely prints the line number,
the line type (as determined by perltidy), and the text of the line:
sub write_line {
# This is called from perltidy line-by-line
my $self = shift;
my $line_of_tokens = shift;
my $line_type = $line_of_tokens->{_line_type};
my $input_line_number = $line_of_tokens->{_line_number};
my $input_line = $line_of_tokens->{_line_text};
print "$input_line_number:$line_type:$input_line";
}
The complete program, B<perllinetype>, is contained in the examples section of
the source distribution. As this example shows, the callback method
receives a parameter B<$line_of_tokens>, which is a reference to a hash
of other useful information. This example uses these hash entries:
$line_of_tokens->{_line_number} - the line number (1,2,...)
$line_of_tokens->{_line_text} - the text of the line
$line_of_tokens->{_line_type} - the type of the line, one of:
SYSTEM - system-specific code before hash-bang line
CODE - line of perl code (including comments)
POD_START - line starting pod, such as '=head'
POD - pod documentation text
POD_END - last line of pod section, '=cut'
HERE - text of here-document
HERE_END - last line of here-doc (target word)
FORMAT - format section
FORMAT_END - last line of format section, '.'
DATA_START - __DATA__ line
DATA - unidentified text following __DATA__
END_START - __END__ line
END - unidentified text following __END__
ERROR - we are in big trouble, probably not a perl script
Most applications will be only interested in lines of type B<CODE>. For
another example, let's write a program which checks for one of the
so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
can slow down processing. Here is a B<write_line>, from the example
program B<find_naughty.pl>, which does that:
sub write_line {
# This is called back from perltidy line-by-line
# We're looking for $`, $&, and $'
my ( $self, $line_of_tokens ) = @_;
# pull out some stuff we might need
my $line_type = $line_of_tokens->{_line_type};
my $input_line_number = $line_of_tokens->{_line_number};
my $input_line = $line_of_tokens->{_line_text};
my $rtoken_type = $line_of_tokens->{_rtoken_type};
my $rtokens = $line_of_tokens->{_rtokens};
chomp $input_line;
# skip comments, pod, etc
return if ( $line_type ne 'CODE' );
# loop over tokens looking for $`, $&, and $'
for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
# we only want to examine token types 'i' (identifier)
next unless $$rtoken_type[$j] eq 'i';
# pull out the actual token text
my $token = $$rtokens[$j];
# and check it
if ( $token =~ /^\$[\`\&\']$/ ) {
print STDERR
"$input_line_number: $token\n";
}
}
}
This example pulls out these tokenization variables from the $line_of_tokens
hash reference:
$rtoken_type = $line_of_tokens->{_rtoken_type};
$rtokens = $line_of_tokens->{_rtokens};
The variable C<$rtoken_type> is a reference to an array of token type codes,
and C<$rtokens> is a reference to a corresponding array of token text.
These are obviously only defined for lines of type B<CODE>.
Perltidy classifies tokens into types, and has a brief code for each type.
You can get a complete list at any time by running perltidy from the
command line with
perltidy --dump-token-types
In the present example, we are only looking for tokens of type B<i>
(identifiers), so the for loop skips past all other types. When an
identifier is found, its actual text is checked to see if it is one
being sought. If so, the above write_line prints the token and its
line number.
The B<formatter> feature is relatively new in perltidy, and further
documentation needs to be written to complete its description. However,
several example programs have been written and can be found in the
B<examples> section of the source distribution. Probably the best way
to get started is to find one of the examples which most closely matches
your application and start modifying it.
For help with perltidy's pecular way of breaking lines into tokens, you
might run, from the command line,
perltidy -D filename
where F<filename> is a short script of interest. This will produce
F<filename.DEBUG> with interleaved lines of text and their token types.
The B<-D> flag has been in perltidy from the beginning for this purpose.
If you want to see the code which creates this file, it is
C<write_debug_entry> in Tidy.pm.
=head1 EXPORT
&perltidy
=head1 CREDITS
Thanks to Hugh Myers who developed the initial modular interface
to perltidy.
=head1 VERSION
This man page documents Perl::Tidy version 20071205.
=head1 AUTHOR
Steve Hancock
perltidy at users.sourceforge.net
=head1 SEE ALSO
The perltidy(1) man page describes all of the features of perltidy. It
can be found at http://perltidy.sourceforge.net.
=cut