ParserState.pm   [plain text]


#! /usr/bin/perl -w
#
# Class name: 	ParserState
# Synopsis: 	Used by gatherHeaderDoc.pl to hold parser state
# Last Updated: $Date: 2009/03/30 19:38:51 $
# 
# Copyright (c) 1999-2004 Apple Computer, Inc.  All rights reserved.
#
# @APPLE_LICENSE_HEADER_START@
#
# This file contains Original Code and/or Modifications of Original Code
# as defined in and that are subject to the Apple Public Source License
# Version 2.0 (the 'License'). You may not use this file except in
# compliance with the License. Please obtain a copy of the License at
# http://www.opensource.apple.com/apsl/ and read it before using this
# file.
# 
# The Original Code and all software distributed under the License are
# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
# Please see the License for the specific language governing rights and
# limitations under the License.
#
# @APPLE_LICENSE_HEADER_END@
#
######################################################################
package HeaderDoc::ParserState;

use strict;
use vars qw($VERSION @ISA);
use HeaderDoc::Utilities qw(isKeyword quote stringToFields);

$HeaderDoc::ParserState::VERSION = '$Revision: 1.8 $';
################ General Constants ###################################
my $debugging = 0;

my $treeDebug = 0;

my $backslashDebug = 0;

my %defaults = (
	frozensodname => "",
	stackFrozen => 0, # set to prevent fake parsed params with inline funcs
	returntype => "",
	freezereturn => 0,       # set to prevent fake return types with inline funcs
	availability => "",      # holds availability string if we find an av macro.
	lang => "C",

	inComment => 0,
	inInlineComment => 0,
	inString => 0,
	inChar => 0,
	inTemplate => 0,
	inOperator => 0,
	inPrivateParamTypes => 0,  # after a colon in a C++ function declaration.
	onlyComments => 1,         # set to 0 to avoid switching to macro parse.
                                  # mode after we have seen a code token.
	inMacro => 0,
	inMacroLine => 0,          # for handling macros in middle of data types.
	seenMacroPart => 0,        # used to control dropping of macro body.
	macroNoTrunc => 1,         # used to avoid truncating body of macros
	inBrackets => 0,           # square brackets ([]).
    # $self->{inPType} = 0;              # in pascal types.
    # $self->{inRegexp} = 0;             # in perl regexp.
    # $self->{regexpNoInterpolate} = 0;  # Don't interpolate (e.g. tr)
    # $self->{inRegexpTrailer} = 0;      # in the cruft at the end of a regexp.
    # $self->{ppSkipOneToken} = 0;       # Comments are always dropped from parsed
                                  # parameter lists.  However, inComment goes
                                  # to 0 on the end-of-comment character.
                                  # This prevents the end-of-comment character
                                  # itself from being added....

    # $self->{lastsymbol} = "";          # Name of the last token, wiped by braces,
                                  # parens, etc.  This is not what you are
                                  # looking for.  It is used mostly for
                                  # handling names of typedefs.
	name => "",                # Name of a basic data type.
	callbackNamePending => 0,  # 1 if callback name could be here.  This is
                                  # only used for typedef'ed callbacks.  All
                                  # other callbacks get handled by the parameter
                                  # parsing code.  (If we get a second set of
                                  # parsed parameters for a function, the first
                                  # one becomes the callback name.)
	callbackName => "",        # Name of this callback.
	callbackIsTypedef => 0,    # 1 if the callback is wrapped in a typedef---
                                  # sets priority order of type matching (up
                                  # one level in headerdoc2HTML.pl).

	namePending => 0,          # 1 if name of func/variable is coming up.
	basetype => "",            # The main name for this data type.
	posstypes => "",           # List of type names for this data type.
	posstypesPending => 1,     # If this token could be one of the
                                  # type names of a typedef/struct/union/*
                                  # declaration, this should be 1.
	sodtype => "",             # 'start of declaration' type.
	sodname => "",             # 'start of declaration' name.
	sodclass => "",            # 'start of declaration' "class".  These
                                  # bits allow us keep track of functions and
                                  # callbacks, mostly, but not the name of a
                                  # callback.

	simpleTypedef => 0,        # High if it's a typedef w/o braces.
	simpleTDcontents => "",    # Guts of a one-line typedef.  Don't ask.
	seenBraces => 0,           # Goes high after initial brace for inline
                                  # functions and macros -only-.  We
                                  # essentially stop parsing at this point.
	kr_c_function => 0,        # Goes high if we see a K&R C declaration.
	kr_c_name => "",           # The name of a K&R function (which would
                                  # otherwise get lost).

    # $self->{lastchar} = "";            # Ends with the last token, but may be longer.
    # $self->{lastnspart} = "";          # The last non-whitespace token.
    # $self->{lasttoken} = "";           # The last token seen (though [\n\r] may be
                                  # replaced by a space in some cases).
	startOfDec => 1,           # Are we at the start of a declaration?
    # $self->{prespace} = 0;             # Used for indentation (deprecated).
    # $self->{prespaceadjust} = 0;       # Indentation is now handled by the parse
                                  # tree (colorizer) code.
    # $self->{scratch} = "";             # Scratch space.
    # $self->{curline} = "";             # The current line.  This is pushed onto
                                  # the declaration at a newline and when we
                                  # enter/leave certain constructs.  This is
                                  # deprecated in favor of the parse tree.
    # $self->{curstring} = "";           # The string we're currently processing.
    # $self->{continuation} = 0;         # An obscure spacing workaround.  Deprecated.
    # $self->{forcenobreak} = 0;         # An obscure spacing workaround.  Deprecated.
	occmethod => 0,            # 1 if we're in an ObjC method.
    # $self->{occspace} = 0;             # An obscure spacing workaround.  Deprecated.
	occmethodname => "",       # The name of an objective C method (which
                                  # gets augmented to be this:that:theother).
	preTemplateSymbol => "",   # The last symbol prior to the start of a
                                  # C++ template.  Used to determine whether
                                  # the type returned should be a function or
                                  # a function template.
	preEqualsSymbol => "",     # Used to get the name of a variable that
                                  # is followed by an equals sign.
	valuepending => 0,         # True if a value is pending, used to
                                  # return the right value.
	value => "",               # The current value.
	parsedParamParse => 0,
    # $self->{parsedParam} = "";         # The current parameter being parsed.
    # $self->{postPossNL} = 0;           # Used to force certain newlines to be added
                                  # to the parse tree (to end macros, etc.)
	categoryClass => "",
	classtype => "",
	inClass => 0,

	seenTilde => 0,          # set to 1 for C++ destructor.

	# parsedParamList => undef, # currently active parsed parameter list.
	# pplStack => undef, # stack of parsed parameter lists.  Used to handle
                       # fields and parameters in nested callbacks/structs.
	# freezeStack => undef, # copy of pplStack when frozen.

	initbsCount => 0,
	# hollow => undef,      # a spot in the tree to put stuff.
	noInsert => 0,
	bracePending => 0,	# set to 1 if lack of a brace would change
				# from being a struct/enum/union/typedef
				# to a variable.
	backslashcount => 0,

	functionReturnsCallback => 0

);

# print STDERR "DEFAULTS: startOfDec: ".$defaults{startOfDec}."\n";
# print STDERR "DEFAULTS: inClass: ".$defaults{inClass}."\n";

sub new {
    my($param) = shift;
    my($class) = ref($param) || $param;
    my %selfhash = %defaults;
    my $self = \%selfhash;

    # print STDERR "startOfDec: ".$self->{startOfDec}."\n";
    # print STDERR "startOfDecX: ".$defaults{startOfDec}."\n";

# print STDERR "CREATING NEW PARSER STATE!\n";
    
    bless($self, $class);
    $self->_initialize();
    # Now grab any key => value pairs passed in
    my (%attributeHash) = @_;
    foreach my $key (keys(%attributeHash)) {
        my $ucKey = uc($key);
        $self->{$ucKey} = $attributeHash{$key};
    }  
    return ($self);
}

sub _initialize {
    my($self) = shift;
    my @arr1 = ();
    my @arr2 = ();
    my @arr3 = ();

    $self->{parsedParamList} = \@arr1; # currently active parsed parameter list.
    $self->{pplStack} = \@arr2; # stack of parsed parameter lists.  Used to handle
                       # fields and parameters in nested callbacks/structs.
    $self->{freezeStack} = \@arr3; # copy of pplStack when frozen.

    my %orighash = %{$self};

    return;

    # my($self) = shift;

    $self->{frozensodname} = "";
    $self->{stackFrozen} = 0; # set to prevent fake parsed params with inline funcs
    $self->{returntype} = "";
    $self->{freezereturn} = 0;       # set to prevent fake return types with inline funcs
    $self->{availability} = "";      # holds availability string if we find an av macro.
    $self->{lang} = "C";

    $self->{inComment} = 0;
    $self->{inInlineComment} = 0;
    $self->{inString} = 0;
    $self->{inChar} = 0;
    $self->{inTemplate} = 0;
    $self->{inOperator} = 0;
    $self->{inPrivateParamTypes} = 0;  # after a colon in a C++ function declaration.
    $self->{onlyComments} = 1;         # set to 0 to avoid switching to macro parse.
                                  # mode after we have seen a code token.
    $self->{inMacro} = 0;
    $self->{inMacroLine} = 0;          # for handling macros in middle of data types.
    $self->{seenMacroPart} = 0;        # used to control dropping of macro body.
    $self->{macroNoTrunc} = 1;         # used to avoid truncating body of macros
    $self->{inBrackets} = 0;           # square brackets ([]).
    # $self->{inPType} = 0;              # in pascal types.
    # $self->{inRegexp} = 0;             # in perl regexp.
    # $self->{regexpNoInterpolate} = 0;  # Don't interpolate (e.g. tr)
    # $self->{inRegexpTrailer} = 0;      # in the cruft at the end of a regexp.
    # $self->{ppSkipOneToken} = 0;       # Comments are always dropped from parsed
                                  # parameter lists.  However, inComment goes
                                  # to 0 on the end-of-comment character.
                                  # This prevents the end-of-comment character
                                  # itself from being added....

    # $self->{lastsymbol} = "";          # Name of the last token, wiped by braces,
                                  # parens, etc.  This is not what you are
                                  # looking for.  It is used mostly for
                                  # handling names of typedefs.
    $self->{name} = "";                # Name of a basic data type.
    $self->{callbackNamePending} = 0;  # 1 if callback name could be here.  This is
                                  # only used for typedef'ed callbacks.  All
                                  # other callbacks get handled by the parameter
                                  # parsing code.  (If we get a second set of
                                  # parsed parameters for a function, the first
                                  # one becomes the callback name.)
    $self->{callbackName} = "";        # Name of this callback.
    $self->{callbackIsTypedef} = 0;    # 1 if the callback is wrapped in a typedef---
                                  # sets priority order of type matching (up
                                  # one level in headerdoc2HTML.pl).

    $self->{namePending} = 0;          # 1 if name of func/variable is coming up.
    $self->{basetype} = "";            # The main name for this data type.
    $self->{posstypes} = "";           # List of type names for this data type.
    $self->{posstypesPending} = 1;     # If this token could be one of the
                                  # type names of a typedef/struct/union/*
                                  # declaration, this should be 1.
    $self->{sodtype} = "";             # 'start of declaration' type.
    $self->{sodname} = "";             # 'start of declaration' name.
    $self->{sodclass} = "";            # 'start of declaration' "class".  These
                                  # bits allow us keep track of functions and
                                  # callbacks, mostly, but not the name of a
                                  # callback.

    $self->{simpleTypedef} = 0;        # High if it's a typedef w/o braces.
    $self->{simpleTDcontents} = "";    # Guts of a one-line typedef.  Don't ask.
    $self->{seenBraces} = 0;           # Goes high after initial brace for inline
                                  # functions and macros -only-.  We
                                  # essentially stop parsing at this point.
    $self->{kr_c_function} = 0;        # Goes high if we see a K&R C declaration.
    $self->{kr_c_name} = "";           # The name of a K&R function (which would
                                  # otherwise get lost).

    # $self->{lastchar} = "";            # Ends with the last token, but may be longer.
    # $self->{lastnspart} = "";          # The last non-whitespace token.
    # $self->{lasttoken} = "";           # The last token seen (though [\n\r] may be
                                  # replaced by a space in some cases.
    $self->{startOfDec} = 1;           # Are we at the start of a declaration?
    # $self->{prespace} = 0;             # Used for indentation (deprecated).
    # $self->{prespaceadjust} = 0;       # Indentation is now handled by the parse
                                  # tree (colorizer) code.
    # $self->{scratch} = "";             # Scratch space.
    # $self->{curline} = "";             # The current line.  This is pushed onto
                                  # the declaration at a newline and when we
                                  # enter/leave certain constructs.  This is
                                  # deprecated in favor of the parse tree.
    # $self->{curstring} = "";           # The string we're currently processing.
    # $self->{continuation} = 0;         # An obscure spacing workaround.  Deprecated.
    # $self->{forcenobreak} = 0;         # An obscure spacing workaround.  Deprecated.
    $self->{occmethod} = 0;            # 1 if we're in an ObjC method.
    # $self->{occspace} = 0;             # An obscure spacing workaround.  Deprecated.
    $self->{occmethodname} = "";       # The name of an objective C method (which
                                  # gets augmented to be this:that:theother).
    $self->{preTemplateSymbol} = "";   # The last symbol prior to the start of a
                                  # C++ template.  Used to determine whether
                                  # the type returned should be a function or
                                  # a function template.
    $self->{preEqualsSymbol} = "";     # Used to get the name of a variable that
                                  # is followed by an equals sign.
    $self->{valuepending} = 0;         # True if a value is pending, used to
                                  # return the right value.
    $self->{value} = "";               # The current value.
    $self->{parsedParamParse} => 0,
    # $self->{parsedParam} = "";         # The current parameter being parsed.
    # $self->{postPossNL} = 0;           # Used to force certain newlines to be added
                                  # to the parse tree (to end macros, etc.)
    $self->{categoryClass} = "";
    $self->{classtype} = "";
    $self->{inClass} = 0;

    $self->{seenTilde} = 0;          # set to 1 for C++ destructor.

    #my @emptylist = ();
    #$self->{parsedParamList} = \@emptylist; # currently active parsed parameter list.
    #my @emptylistb = ();
    #$self->{pplStack} = \@emptylistb; # stack of parsed parameter lists.  Used to handle
                       # fields and parameters in nested callbacks/structs.
    #my @emptylistc = ();
    #$self->{freezeStack} = \@emptylistc; # copy of pplStack when frozen.

    $self->{initbsCount} = 0;
    $self->{hollow} = undef;      # a spot in the tree to put stuff.
    $self->{noInsert} = 0;
    $self->{bracePending} = 0;	# set to 1 if lack of a brace would change
				# from being a struct/enum/union/typedef
				# to a variable.
    $self->{backslashcount} = 0;

    # foreach my $key (keys %{$self}) {
	# if ($self->{$key} != $orighash{$key}) {
		# print STDERR "HASH DIFFERS FOR KEY $key (".$self->{$key}." != ".$orighash{$key}.")\n";
	# } else {
		# print STDERR "Hash keys same for key $key\n";
	# }
    # }

    return $self;
}

# For consistency.
sub dbprint
{
    my $self = shift;
    return $self->print();
}

sub rollback
{
    my $self = shift;

    my $localDebug = 0;

    my $cloneref = $self->{rollbackState};
    my $clone = ${$cloneref};
    my %selfhash = %{$self};
    my %clonehash = %{$clone};

    if ($localDebug) {
	print STDERR "BEGIN PARSER STATE:\n";
	foreach my $key (keys(%clonehash)) {
		if ($self->{$key} ne $clone->{$key}) {
			print STDERR "$key: ".$self->{$key}." != ".$clone->{$key}."\n";
		}
	}
	print STDERR "END PARSER STATE\n";
    }
    foreach my $key (keys(%selfhash)) {
	# print STDERR "$key => $self->{$key}\n";
	$self->{$key} = undef;
    }
    foreach my $key (keys(%clonehash)) {
	$self->{$key} = $clone->{$key};
    }
    $self->{rollbackState} = undef;
}

sub rollbackSet
{
    my $self = shift;

    my $clone = HeaderDoc::ParserState->new();
    my %selfhash = %{$self};

    # print STDERR "BEGIN PARSER STATE:\n";
    foreach my $key (keys(%selfhash)) {
	# print STDERR "$key => $self->{$key}\n";
	$clone->{$key} = $self->{$key};
    }
    $self->{rollbackState} = \$clone;
    # print STDERR "END PARSER STATE\n";
}

sub print
{
    my $self = shift;
    my %selfhash = %{$self};

    print STDERR "BEGIN PARSER STATE:\n";
    foreach my $key (keys(%selfhash)) {
	print STDERR "$key => $self->{$key}\n";
    }
    print STDERR "END PARSER STATE\n";
}

sub resetBackslash
{
    my $self = shift;
    $self->{backslashcount}=0;

    print STDERR "RESET BACKSLASH. COUNT NOW ".$self->{backslashcount}."\n" if ($backslashDebug);
}

sub addBackslash
{
    my $self = shift;

    $self->{backslashcount}++;

    print STDERR "ADD BACKSLASH. COUNT NOW ".$self->{backslashcount}."\n" if ($backslashDebug);

}

sub isQuoted
{
    my $self = shift;
    my $lang = shift;
    my $sublang = shift;

    my $inSingle = $self->{inChar};
    my $inString = $self->{inString};
    my $count = $self->{backslashcount};

	print STDERR "LANG: $lang INSINGLE: $inSingle INSTRING: $inString\n" if ($backslashDebug);

    # Shell scripts treat single quotes as raw data.  Backslashes
    # inside are not treated as quote characters, so to put a single
    # quote, you have to put it inside a double quote contest, e.g.
    # "It's" or 'It'"'"'s'
    if ($inSingle && $lang eq "shell") {
	print STDERR "isQuoted: Shell script single quote backslash: not quoted.  Returning 0 (count is $count).\n" if ($backslashDebug);
	return 0;
    }

    # C shell scripts don't interpret \ within a string.
    if ($inString && $lang eq "shell" && $sublang eq "csh") {
	print STDERR "isQuoted: C Shell script backslash in double quotes: not quoted.  Returning 0 (count is $count).\n" if ($backslashDebug);
	return 0;
    }


    if ($count % 2) {
	print STDERR "isQuoted: Returning 1 (count is $count).\n" if ($backslashDebug);
	return 1;
    }
    print STDERR "isQuoted: Returning 0 (count is $count).\n" if ($backslashDebug);
    return 0;
}

1;