Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
443f21d
Add tests for PPI::Token::HereDoc.
guillaumeaubert Mar 21, 2014
e46845c
Fix parsing heredocs without a trailing newline.
guillaumeaubert Mar 21, 2014
1bae7c9
little bit of a readability improment in PPI::Token::HereDoc
wchristian Nov 2, 2014
b81d157
assume * after } is an operator, not a cast
wchristian Nov 6, 2014
2bc2fc4
Make the lexer recognize the implied end of a package statement that …
moregan Mar 25, 2014
e0c7a56
Issue #75 Prevent package names from being parsed as operators, etc.
moregan Mar 26, 2014
b95ecd9
Issue #76 Prevent package names like v10 from being version strings
moregan Mar 27, 2014
207c24b
Issue #65 Prevent sub names like v10 from being version strings
moregan Mar 27, 2014
e4640bb
Prevent 'use' and 'no' package names from being parsed as operators
moregan Mar 28, 2014
512c2ab
prevent left side of fat comma parsing as operator
moregan Mar 30, 2014
8c66b71
unit test all PPI::Statement::Sub methods
moregan Nov 12, 2014
deefad0
allow Token::Whitespace to throw useful error on unexpected input
wchristian Nov 13, 2014
0d016ce
tabify
moregan Nov 17, 2014
136b2cd
more test coverage and comments
moregan Nov 17, 2014
4e72b4f
fix GitHub #122: "x64" being parsed as x operator plus number
moregan Nov 17, 2014
6514873
PPI::Test::pragmas for test files instead of boilerplate
moregan Nov 13, 2014
8b06afb
t::lib::PPI::Test::pragmas for a release test
moregan Nov 14, 2014
2ae0eae
typo fixes
moregan Feb 17, 2014
b034573
test refactoring and coverage
moregan Feb 17, 2014
fb8f80e
unescape results from PPI::Token::QuoteLike::Words::literal
moregan Feb 17, 2014
a212df0
use xdigit character class -- fix #100
moregan Nov 20, 2014
a334732
Allow statements as well as structures and tokens to be inserted adja…
karenetheridge Dec 18, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
Revision history for Perl extension PPI
- Fix 1.218 regression where packages, subs, and words after
labels like /^x\d+/ would parse as x operator (GitHub #122)
(MOREGAN)

1.221_01
Summary:
- support Perl 5.12 "package NAMESPACE VERSION BLOCK" syntax

Details:
- support Perl 5.12 "package NAMESPACE VERSION BLOCK" syntax
(RT #67831, GitHub #70) (BDFOY, MOREGAN)
- Prevent package names like 'x' from being parsed as operators
(GitHub #75) (MOREGAN)
- Prevent package names like 'v10' from being parsed as version
strings (GitHub #76) (MOREGAN)
- Prevent sub names like 'v10' from being parsed as version
strings (RT #74527, GitHub #65) (JAE, MOREGAN)
- Prevent 'use' and 'no' package names from being parsed as
operators (MOREGAN)
- Prevent left side of fat comma from parsing as operator
operators (MOREGAN)

1.220 Tue 11 Nov 2014
Summary:
Expand Down
1 change: 1 addition & 0 deletions Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ test_requires 'Test::More' => '0.86';
test_requires 'Test::NoWarnings' => '0.084';
test_requires 'Test::Object' => '0.07';
test_requires 'Test::SubCalls' => '1.07';
test_requires 'Test::Deep';

# Force the existence of the weaken function
# (which some distributions annoyingly don't have)
Expand Down
2 changes: 1 addition & 1 deletion lib/PPI/Cache.pm
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ sub _md5hex {
my $it = _SCALAR($_[0])
? PPI::Util::md5hex(${$_[0]})
: $_[0];
return (defined $it and ! ref $it and $it =~ /^[a-f0-9]{32}\z/si)
return (defined $it and ! ref $it and $it =~ /^[[:xdigit:]]{32}\z/s)
? lc $it
: undef;
}
Expand Down
71 changes: 56 additions & 15 deletions lib/PPI/Lexer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,28 @@ sub _statement {

# Is it a token in our known classes list
my $class = $STATEMENT_CLASSES{$Token->content};
if ( $class ) {
# Is the next significant token a =>
# Read ahead to the next significant token
my $Next;
while ( $Next = $self->_get_token ) {
if ( !$Next->significant ) {
push @{$self->{delayed}}, $Next;
next;
}

last if
!$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';

# Got the next token
# Is an ordinary expression
$self->_rollback( $Next );
return 'PPI::Statement';
}

# Rollback and continue
$self->_rollback( $Next );
}

# Handle potential barewords for subscripts
if ( $Parent->isa('PPI::Structure::Subscript') ) {
Expand Down Expand Up @@ -533,8 +555,16 @@ sub _statement {
}

# Found the next significant token.
if (
$Next->isa('PPI::Token::Operator')
and
$Next->content eq '=>'
) {
# Is an ordinary expression
$self->_rollback( $Next );
return 'PPI::Statement';
# Is it a v6 use?
if ( $Next->content eq 'v6' ) {
} elsif ( $Next->content eq 'v6' ) {
$self->_rollback( $Next );
return 'PPI::Statement::Include::Perl6';
} else {
Expand Down Expand Up @@ -696,22 +726,23 @@ sub _continues {
return '';
}

# Alrighty then, there are only five implied end statement types,
# ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, and ::When
# statements.
unless ( ref($Statement) =~ /\b(?:Scheduled|Sub|Compound|Given|When)$/ ) {
return 1;
}

# Of these five, ::Scheduled, ::Sub, ::Given, and ::When follow the same
# simple rule and can be handled first.
# Alrighty then, there are six implied-end statement types:
# ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
# and ::Package statements.
return 1
if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;

# Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
# simple rule and can be handled first. The block form of ::Package
# follows the rule, too. (The non-block form of ::Package
# requires a statement terminator, and thus doesn't need to have
# an implied end detected.)
my @part = $Statement->schildren;
my $LastChild = $part[-1];
unless ( $Statement->isa('PPI::Statement::Compound') ) {
# If the last significant element of the statement is a block,
# then a scheduled statement is done, no questions asked.
return ! $LastChild->isa('PPI::Structure::Block');
}
# If the last significant element of the statement is a block,
# then an implied-end statement is done, no questions asked.
return !$LastChild->isa('PPI::Structure::Block')
if !$Statement->isa('PPI::Statement::Compound');

# Now we get to compound statements, which kind of suck (to lex).
# However, of them all, the 'if' type, which includes unless, are
Expand Down Expand Up @@ -1118,6 +1149,16 @@ sub _curly {
and return 'PPI::Structure::Subscript';
}
}

# Are we the last argument of sub?
# E.g.: 'sub foo {}', 'sub foo ($) {}'
return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');

# Are we the second or third argument of package?
# E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
return 'PPI::Structure::Block'
if $Parent->isa('PPI::Statement::Package');

if ( $CURLY_CLASSES{$content} ) {
# Known type
return $CURLY_CLASSES{$content};
Expand Down
3 changes: 3 additions & 0 deletions lib/PPI/Statement/Package.pm
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ BEGIN {
@ISA = 'PPI::Statement';
}

# Lexer clues
sub __LEXER__normal() { '' }

=pod

=head2 namespace
Expand Down
8 changes: 4 additions & 4 deletions lib/PPI/Statement/Sub.pm
Original file line number Diff line number Diff line change
Expand Up @@ -165,17 +165,17 @@ Returns true if it is a special reserved subroutine, or false if not.
sub reserved {
my $self = shift;
my $name = $self->name or return '';
# perlsub is silent on whether reserveds can contain:
# - underscores;
# we allow them due to existing practice like CLONE_SKIP and __SUB__.
# - numbers; we allow them by PPI tradition.
$name eq uc $name;
}

1;

=pod

=head1 TO DO

- Write unit tests for this package

=head1 SUPPORT

See the L<support section|PPI/SUPPORT> in the main module.
Expand Down
12 changes: 6 additions & 6 deletions lib/PPI/Token.pm
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ sub content {
sub insert_before {
my $self = shift;
my $Element = _INSTANCE(shift, 'PPI::Element') or return undef;
if ( $Element->isa('PPI::Structure') ) {
return $self->__insert_before($Element);
} elsif ( $Element->isa('PPI::Token') ) {
if ( $Element->isa('PPI::Statement')
|| $Element->isa('PPI::Structure')
|| $Element->isa('PPI::Token') ) {
return $self->__insert_before($Element);
}
'';
Expand All @@ -184,9 +184,9 @@ sub insert_before {
sub insert_after {
my $self = shift;
my $Element = _INSTANCE(shift, 'PPI::Element') or return undef;
if ( $Element->isa('PPI::Structure') ) {
return $self->__insert_after($Element);
} elsif ( $Element->isa('PPI::Token') ) {
if ( $Element->isa('PPI::Statement')
|| $Element->isa('PPI::Structure')
|| $Element->isa('PPI::Token') ) {
return $self->__insert_after($Element);
}
'';
Expand Down
41 changes: 19 additions & 22 deletions lib/PPI/Token/HereDoc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,10 @@ sub __TOKENIZER__on_char {
return undef;
}

# Define $line outside of the loop, so that if we encounter the
# end of the file, we have access to the last line still.
my $line;

# Suck in the HEREDOC
$token->{_heredoc} = [];
$token->{_heredoc} = \my @heredoc;
my $terminator = $token->{_terminator} . "\n";
while ( defined($line = $t->_get_line) ) {
while ( defined( my $line = $t->_get_line ) ) {
if ( $line eq $terminator ) {
# Keep the actual termination line for consistency
# when we are re-assembling the file
Expand All @@ -219,29 +215,30 @@ sub __TOKENIZER__on_char {
}

# Add the line
push @{$token->{_heredoc}}, $line;
push @heredoc, $line;
}

# End of file.
# Error: Didn't reach end of here-doc before end of file.
# $line might be undef if we get NO lines.
if ( defined $line and $line eq $token->{_terminator} ) {
# If the last line matches the terminator
# but is missing the newline, we want to allow
# it anyway (like perl itself does). In this case
# perl would normally throw a warning, but we will
# also ignore that as well.
pop @{$token->{_heredoc}};
$token->{_terminator_line} = $line;
} else {
# The HereDoc was not properly terminated.
$token->{_terminator_line} = undef;

# Trim off the trailing whitespace
if ( defined $token->{_heredoc}->[-1] and $t->{source_eof_chop} ) {
chop $token->{_heredoc}->[-1];
# If the here-doc block is not empty, look at the last line to determine if
# the here-doc terminator is missing a newline (which Perl would fail to
# compile but is easy to detect) or if the here-doc block was just not
# terminated at all (which Perl would fail to compile as well).
$token->{_terminator_line} = undef;
if ( @heredoc and defined $heredoc[-1] ) {
# See PPI::Tokenizer, the algorithm there adds a space at the end of the
# document that we need to make sure we remove.
if ( $t->{source_eof_chop} ) {
chop $heredoc[-1];
$t->{source_eof_chop} = '';
}

# Check if the last line of the file matches the terminator without
# newline at the end. If so, remove it from the content and set it as
# the terminator line.
$token->{_terminator_line} = pop @heredoc
if $heredoc[-1] eq $token->{_terminator};
}

# Set a hint for PPI::Document->serialize so it can
Expand Down
2 changes: 1 addition & 1 deletion lib/PPI/Token/Number/Hex.pm
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ sub __TOKENIZER__on_char {
# Allow underscores straight through
return 1 if $char eq '_';

if ( $char =~ /[\da-f]/i ) {
if ( $char =~ /[[:xdigit:]]/ ) {
return 1;
}

Expand Down
16 changes: 11 additions & 5 deletions lib/PPI/Token/Number/Version.pm
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,23 @@ sub __TOKENIZER__on_char {
sub __TOKENIZER__commit {
my $t = $_[1];

# Get the rest of the line
# Capture the rest of the token
pos $t->{line} = $t->{line_cursor};
if ( $t->{line} !~ m/\G(v\d+(?:\.\d+)*)/gc ) {
if ( $t->{line} !~ m/\G(v\d+(?:\.\d+)+|v\d+\b)/gc ) {
# This was not a v-string after all (it's a word)
return PPI::Token::Word->__TOKENIZER__commit($t);
}

my $content = $1;

# If there are no periods this could be a word starting with v\d
# Forced to be a word. Done.
return PPI::Token::Word->__TOKENIZER__commit($t)
if $content !~ /\./ and $t->__current_token_is_forced_word;

# This is a v-string
my $vstring = $1;
$t->{line_cursor} += length($vstring);
$t->_new_token('Number::Version', $vstring);
$t->{line_cursor} += length $content;
$t->_new_token( 'Number::Version', $content );
$t->_finalize_token->__TOKENIZER__on_char($t);
}

Expand Down
23 changes: 15 additions & 8 deletions lib/PPI/Token/QuoteLike/Words.pm
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,27 @@ BEGIN {

=head2 literal

Returns the words contained. Note that this method does not check the
Returns the words contained as a list. Note that this method does not check the
context that the token is in; it always returns the list and not merely
the last element if the token is in scalar context.

=cut

sub literal {
my $self = shift;
my $section = $self->{sections}->[0];
return split ' ', substr(
$self->{content},
$section->{position},
$section->{size},
);
my ( $self ) = @_;

my $content = $self->_section_content(0);
return if !defined $content;

# Undo backslash escaping of '\', the left delimiter,
# and the right delimiter. The right delimiter will
# only exist with paired delimiters: qw() qw[] qw<> qw{}.
my ( $left, $right ) = ( $self->_delimiters, '', '' );
$content =~ s/\\([\Q$left$right\\\E])/$1/g;

my @words = split ' ', $content;

return @words;
}

1;
Expand Down
2 changes: 1 addition & 1 deletion lib/PPI/Token/Unknown.pm
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ sub __TOKENIZER__on_char {
} elsif (
$p0->isa('PPI::Token::Structure')
and
$p0->content =~ /^(?:\)|\])$/
$p0->content =~ /^(?:\)|\]|\})$/
) {
$_class = 'Operator';
} else {
Expand Down
9 changes: 5 additions & 4 deletions lib/PPI/Token/Whitespace.pm
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ sub __TOKENIZER__on_line_start {

sub __TOKENIZER__on_char {
my $t = $_[1];
my $char = ord substr $t->{line}, $t->{line_cursor}, 1;
my $c = substr $t->{line}, $t->{line_cursor}, 1;
my $char = ord $c;

# Do we definitely know what something is?
return $COMMITMAP[$char]->__TOKENIZER__commit($t) if $COMMITMAP[$char];
Expand Down Expand Up @@ -407,9 +408,9 @@ sub __TOKENIZER__on_char {
}

} elsif ( $char >= 128 ) { # Outside ASCII
return 'PPI::Token::Word'->__TOKENIZER__commit($t) if $t =~ /\w/;
return 'Whitespace' if $t =~ /\s/;
}
return 'PPI::Token::Word'->__TOKENIZER__commit($t) if $c =~ /\w/;
return 'Whitespace' if $c =~ /\s/;
}


# All the whitespaces are covered, so what to do
Expand Down
Loading