Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 68 additions & 31 deletions perltest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,19 @@
# a script to Perl through a pipe. See comments below about the data for the
# Perl script. If the next argument of this script is "-utf8", a suitable
# prefix for the Perl script is set up.

# If the next argument of this script is -locale, it must be followed by the
# name of a locale, which is then set when running the tests. Setting a locale
# implies -utf8. For example:
#
# ./perltest.sh -locale tr_TR.utf8 some-file
# A similar process is used to indicate the desire to set a specific locale
# tables per pattern in a similar way to pcre2test through a locale modifier,
# by using the -locale argument. This can be optionally combined with the
# previous arguments; for example, to process an UTF-8 test file in Turkish,
# add the locale=tr_TR.utf8 modifier to the pattern and -locale to perltest,
# or invoke something like (the specific names of the locale might vary):
#
# ./perltest.sh -utf8 -locale=tr_TR.utf8 some-file
#
# If the -locale argument has no setting, a suitable default locale is used
# when possible and reported at startup, it can be always overriden using the
# locale modifier for each pattern.
#
# The remaining arguments of this script, if any, are passed to Perl. They are
# an input file and an output file. If there is one argument, the output is
Expand All @@ -33,7 +40,7 @@

perl=perl
perlarg=""
prefix=''
prefix=""
spc=""

if [ $# -gt 0 -a "$1" = "-perl" ] ; then
Expand All @@ -53,27 +60,37 @@ if [ $# -gt 0 -a "$1" = "-w" ] ; then
fi

if [ $# -gt 0 -a "$1" = "-utf8" ] ; then
prefix="use utf8; require Encode;"
default_locale="C.utf8"
prefix="\
use utf8;\
require Encode;"
perlarg="$perlarg$spc-CSD"

shift
fi

if [ $# -gt 0 -a "$1" = "-locale" ] ; then
if [ $# -lt 2 ] ; then
echo "perltest.sh: Missing locale name - abandoned"
exit 1
if [ $# -gt 0 ] ; then
case "$1" in
-locale=*)
default_locale=${1#-locale=}
;;
-locale)
default_locale=${default_locale:-C}
;;
*)
skip=1
esac
if [ -z "$skip" ] ; then
prefix="\
use POSIX qw(locale_h);\
use locale qw(:ctype);\
\
\$default_locale = setlocale(LC_CTYPE, \"$default_locale\");\
if (!defined(\$default_locale))\
{ die \"perltest: Failed to set locale \\\"$default_locale\\\"\\\n\"; }\
print \"Locale: \$default_locale\\\n\";\
$prefix"
shift
fi
prefix="use utf8;\
use POSIX qw(locale_h);\
use locale;\
\$loc=setlocale(LC_ALL, \"$2\");\
if (\"\$loc\" eq \"\")\
{ die \"perltest.sh: Failed to set locale \\\"$2\\\" - abandoned\\n\";}\
print \"Locale: \$loc\\n\";\
require Encode;"
shift
shift
fi


Expand All @@ -87,6 +104,7 @@ fi
# dupnames ignored (Perl always allows)
# hex preprocess pattern with embedded octets
# jitstack ignored
# locale use a specific locale tables
# mark show mark information
# no_auto_possess ignored
# no_start_optimize insert (??{""}) at pattern start (disables optimizing)
Expand Down Expand Up @@ -146,7 +164,7 @@ else
{
foreach $c (split(//, $_[0]))
{
if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
if ($c =~ /^[[:print:]]$/) { $t .= $c; }
else { $t .= sprintf("\\x%02x", ord $c); }
}
}
Expand Down Expand Up @@ -190,6 +208,12 @@ $default_show_mark = 0;
NEXT_RE:
for (;;)
{
if (defined $locale && defined $default_locale)
{
setlocale(LC_CTYPE, $default_locale);
undef $locale;
}

printf " re> " if $interact;
last if ! ($_ = <$infile>);
printf $outfile "$_" if ! $interact;
Expand Down Expand Up @@ -263,10 +287,6 @@ for (;;)

$mod =~ s/allaftertext,?//;

# Detect utf

$utf8 = $mod =~ s/utf,?//;

# Remove "dupnames".

$mod =~ s/dupnames,?//;
Expand All @@ -275,6 +295,19 @@ for (;;)

$mod =~ s/jitstack=\d+,?//;

# The "locale" modifier indicates which locale to use
if ($mod =~ /locale=([^,]+),?/)
{
die "perltest: missing -locale cmdline flag" unless defined &setlocale;
$locale = setlocale(LC_CTYPE, $1);
if (!defined $locale)
{
print "** Failed to set locale '$1'\n";
next NEXT_RE;
}
}
$mod =~ s/locale=[^,]*,?//; # Remove it; "locale=" Ignored

# The "mark" modifier requests checking of MARK data */

$show_mark = $default_show_mark | ($mod =~ s/mark,?//);
Expand All @@ -283,11 +316,16 @@ for (;;)

$mod =~ s/ucp,?/u/;

# Detect utf

$utf8 = $mod =~ s/utf,?//;

# Remove "no_auto_possess".

$mod =~ s/no_auto_possess,?//;

# The "hex" modifier instructs us to preprocess the pattern
# The "hex" modifier instructs us to preprocess a pattern with embedded
# octets formatted as two digit hexadecimals

if ($mod =~ s/hex,?//)
{
Expand Down Expand Up @@ -321,12 +359,11 @@ for (;;)

$mod =~ s/-no_start_optimize,?//;

if ($mod =~ s/no_start_optimize,?//) { $pat =~ s/$del/$del(??{""})/; }
if ($mod =~ s/no_start_optimize,?//) { $pat = '(??{""})' . $pat; }

# Add back retained modifiers and check that the pattern is valid.

$mod =~ s/,//g;

$pattern = "$del$pat$del$mod";

eval "\$_ =~ ${pattern}";
Expand Down Expand Up @@ -419,7 +456,7 @@ for (;;)

if ($@)
{
printf $outfile "Error: $@\n";
printf $outfile "Error: $@";
next NEXT_RE;
}
elsif (scalar(@subs) == 0)
Expand Down
2 changes: 1 addition & 1 deletion src/pcre2test.c
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ static modstruct modlist[] = {
{ "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
{ "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
{ "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
{ "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
{ "locale", MOD_PATP, MOD_STR, LOCALESIZE, PO(locale) },
{ "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
{ "match_invalid_utf", MOD_PAT, MOD_OPT, PCRE2_MATCH_INVALID_UTF, PO(options) },
{ "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
Expand Down
9 changes: 9 additions & 0 deletions testdata/testinput1
Original file line number Diff line number Diff line change
Expand Up @@ -5087,6 +5087,15 @@ name)/mark
\= Expect no match
D

/(*COMMIT)ABC/no_start_optimize
ABC
\= Expect no match
DEFABC

/(*COMMIT)ABC/
ABC
DEFABC

# This should fail, as the skip causes a bump to offset 3 (the skip).

/A(*MARK:A)A+(*SKIP)(B|Z) | AC/x,mark
Expand Down
14 changes: 9 additions & 5 deletions testdata/testinput3
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This set of tests checks local-specific features, using the "fr_FR" locale.
# It is not Perl-compatible. When run via RunTest, the locale is edited to
# It is almost Perl-compatible. When run via RunTest, the locale is edited to
# be whichever of "fr_FR", "french", or "fr" is found to exist. There is
# different version of this file called wintestinput3 for use on Windows,
# where the locale is called "french" and the tests are run using
Expand All @@ -14,10 +14,6 @@
/^[\w]+/locale=fr_FR
�cole

/^[\w]+/
\= Expect no match
�cole

/^[\W]+/
�cole

Expand Down Expand Up @@ -80,6 +76,14 @@
\= Expect no match
\x9c

/�/i
\xff
\= Expect no match
y

/(.)\1/i
\xfe\xde

/\W+/
>>>\xaa<<<
>>>\xba<<<
Expand Down
13 changes: 13 additions & 0 deletions testdata/testoutput1
Original file line number Diff line number Diff line change
Expand Up @@ -8174,6 +8174,19 @@ MK: B
D
No match, mark = B

/(*COMMIT)ABC/no_start_optimize
ABC
0: ABC
\= Expect no match
DEFABC
No match

/(*COMMIT)ABC/
ABC
0: ABC
DEFABC
0: ABC

# This should fail, as the skip causes a bump to offset 3 (the skip).

/A(*MARK:A)A+(*SKIP)(B|Z) | AC/x,mark
Expand Down
19 changes: 13 additions & 6 deletions testdata/testoutput3
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This set of tests checks local-specific features, using the "fr_FR" locale.
# It is not Perl-compatible. When run via RunTest, the locale is edited to
# It is almost Perl-compatible. When run via RunTest, the locale is edited to
# be whichever of "fr_FR", "french", or "fr" is found to exist. There is
# different version of this file called wintestinput3 for use on Windows,
# where the locale is called "french" and the tests are run using
Expand All @@ -16,11 +16,6 @@ No match
�cole
0: �cole

/^[\w]+/
\= Expect no match
�cole
No match

/^[\W]+/
�cole
0: \xc9
Expand Down Expand Up @@ -115,6 +110,18 @@ No match
\x9c
No match

/�/i
\xff
0: �
\= Expect no match
y
No match

/(.)\1/i
\xfe\xde
0: ��
1: �

/\W+/
>>>\xaa<<<
0: >>>
Expand Down
19 changes: 13 additions & 6 deletions testdata/testoutput3A
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This set of tests checks local-specific features, using the "fr_FR" locale.
# It is not Perl-compatible. When run via RunTest, the locale is edited to
# It is almost Perl-compatible. When run via RunTest, the locale is edited to
# be whichever of "fr_FR", "french", or "fr" is found to exist. There is
# different version of this file called wintestinput3 for use on Windows,
# where the locale is called "french" and the tests are run using
Expand All @@ -16,11 +16,6 @@ No match
�cole
0: �cole

/^[\w]+/
\= Expect no match
�cole
No match

/^[\W]+/
�cole
0: \xc9
Expand Down Expand Up @@ -115,6 +110,18 @@ No match
\x9c
No match

/�/i
\xff
0: �
\= Expect no match
y
No match

/(.)\1/i
\xfe\xde
0: ��
1: �

/\W+/
>>>\xaa<<<
0: >>>
Expand Down
19 changes: 13 additions & 6 deletions testdata/testoutput3B
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This set of tests checks local-specific features, using the "fr_FR" locale.
# It is not Perl-compatible. When run via RunTest, the locale is edited to
# It is almost Perl-compatible. When run via RunTest, the locale is edited to
# be whichever of "fr_FR", "french", or "fr" is found to exist. There is
# different version of this file called wintestinput3 for use on Windows,
# where the locale is called "french" and the tests are run using
Expand All @@ -16,11 +16,6 @@ No match
�cole
0: �cole

/^[\w]+/
\= Expect no match
�cole
No match

/^[\W]+/
�cole
0: \xc9
Expand Down Expand Up @@ -115,6 +110,18 @@ No match
\x9c
No match

/�/i
\xff
0: �
\= Expect no match
y
No match

/(.)\1/i
\xfe\xde
0: ��
1: �

/\W+/
>>>\xaa<<<
0: >>>
Expand Down
Loading