1515# a script to Perl through a pipe. See comments below about the data for the
1616# Perl script. If the next argument of this script is "-utf8", a suitable
1717# prefix for the Perl script is set up.
18-
19- # If the next argument of this script is -locale, it must be followed by the
20- # name of a locale, which is then set when running the tests. Setting a locale
21- # implies -utf8. For example:
2218#
23- # ./perltest.sh -locale tr_TR.utf8 some-file
19+ # A similar process is used to indicate the desire to set a specific locale
20+ # tables per pattern in a similar way to pcre2test through a locale modifier,
21+ # by using the -locale argument. This can be optionally combined with the
22+ # previous arguments; for example, to process an UTF-8 test file in Turkish,
23+ # add the locale=tr_TR.utf8 modifier to the pattern and -locale to perltest,
24+ # or invoke something like (the specific names of the locale might vary):
25+ #
26+ # ./perltest.sh -utf8 -locale=tr_TR.utf8 some-file
27+ #
28+ # If the -locale argument has no setting, a suitable default locale is used
29+ # when possible and reported at startup, it can be always overriden using the
30+ # locale modifier for each pattern.
2431#
2532# The remaining arguments of this script, if any, are passed to Perl. They are
2633# an input file and an output file. If there is one argument, the output is
3340
3441perl=perl
3542perlarg=" "
36- prefix=' '
43+ prefix=" "
3744spc=" "
3845
3946if [ $# -gt 0 -a " $1 " = " -perl" ] ; then
@@ -53,27 +60,37 @@ if [ $# -gt 0 -a "$1" = "-w" ] ; then
5360fi
5461
5562if [ $# -gt 0 -a " $1 " = " -utf8" ] ; then
56- prefix=" use utf8; require Encode;"
63+ default_locale=" C.utf8"
64+ prefix=" \
65+ use utf8;\
66+ require Encode;"
5767 perlarg=" $perlarg$spc -CSD"
58-
5968 shift
6069fi
6170
62- if [ $# -gt 0 -a " $1 " = " -locale" ] ; then
63- if [ $# -lt 2 ] ; then
64- echo " perltest.sh: Missing locale name - abandoned"
65- exit 1
71+ if [ $# -gt 0 ] ; then
72+ case " $1 " in
73+ -locale=* )
74+ default_locale=${1# -locale=}
75+ ;;
76+ -locale)
77+ default_locale=${default_locale:- C}
78+ ;;
79+ * )
80+ skip=1
81+ esac
82+ if [ -z " $skip " ] ; then
83+ prefix=" \
84+ use POSIX qw(locale_h);\
85+ use locale qw(:ctype);\
86+ \
87+ \$ default_locale = setlocale(LC_CTYPE, \" $default_locale \" );\
88+ if (!defined(\$ default_locale))\
89+ { die \" perltest: Failed to set locale \\\" $default_locale \\\"\\ \n\" ; }\
90+ print \" Locale: \$ default_locale\\ \n\" ;\
91+ $prefix "
92+ shift
6693 fi
67- prefix=" use utf8;\
68- use POSIX qw(locale_h);\
69- use locale;\
70- \$ loc=setlocale(LC_ALL, \" $2 \" );\
71- if (\"\$ loc\" eq \"\" )\
72- { die \" perltest.sh: Failed to set locale \\\" $2 \\\" - abandoned\\ n\" ;}\
73- print \" Locale: \$ loc\\ n\" ;\
74- require Encode;"
75- shift
76- shift
7794fi
7895
7996
87104# dupnames ignored (Perl always allows)
88105# hex preprocess pattern with embedded octets
89106# jitstack ignored
107+ # locale use a specific locale tables
90108# mark show mark information
91109# no_auto_possess ignored
92110# no_start_optimize insert (??{""}) at pattern start (disables optimizing)
146164 {
147165 foreach $c (split(//, $_[0]))
148166 {
149- if (ord $c >= 32 && ord $c < 127 ) { $t .= $c; }
167+ if ($c =~ /^[[:print:]]$/ ) { $t .= $c; }
150168 else { $t .= sprintf("\\x%02x", ord $c); }
151169 }
152170 }
@@ -190,6 +208,12 @@ $default_show_mark = 0;
190208NEXT_RE:
191209for (;;)
192210 {
211+ if (defined $locale && defined $default_locale)
212+ {
213+ setlocale(LC_CTYPE, $default_locale);
214+ undef $locale;
215+ }
216+
193217 printf " re> " if $interact;
194218 last if ! ($_ = <$infile>);
195219 printf $outfile "$_" if ! $interact;
@@ -263,10 +287,6 @@ for (;;)
263287
264288 $mod =~ s/allaftertext,?//;
265289
266- # Detect utf
267-
268- $utf8 = $mod =~ s/utf,?//;
269-
270290 # Remove "dupnames".
271291
272292 $mod =~ s/dupnames,?//;
@@ -275,6 +295,19 @@ for (;;)
275295
276296 $mod =~ s/jitstack=\d+,?//;
277297
298+ # The "locale" modifier indicates which locale to use
299+ if ($mod =~ /locale=([^,]+),?/)
300+ {
301+ die "perltest: missing -locale cmdline flag" unless defined &setlocale;
302+ $locale = setlocale(LC_CTYPE, $1);
303+ if (!defined $locale)
304+ {
305+ print "** Failed to set locale '$1'\n";
306+ next NEXT_RE;
307+ }
308+ }
309+ $mod =~ s/locale=[^,]*,?//; # Remove it; "locale=" Ignored
310+
278311 # The "mark" modifier requests checking of MARK data */
279312
280313 $show_mark = $default_show_mark | ($mod =~ s/mark,?//);
@@ -283,11 +316,16 @@ for (;;)
283316
284317 $mod =~ s/ucp,?/u/;
285318
319+ # Detect utf
320+
321+ $utf8 = $mod =~ s/utf,?//;
322+
286323 # Remove "no_auto_possess".
287324
288325 $mod =~ s/no_auto_possess,?//;
289326
290- # The "hex" modifier instructs us to preprocess the pattern
327+ # The "hex" modifier instructs us to preprocess a pattern with embedded
328+ # octets formatted as two digit hexadecimals
291329
292330 if ($mod =~ s/hex,?//)
293331 {
@@ -321,12 +359,11 @@ for (;;)
321359
322360 $mod =~ s/-no_start_optimize,?//;
323361
324- if ($mod =~ s/no_start_optimize,?//) { $pat =~ s/$del/$del (??{""})/ ; }
362+ if ($mod =~ s/no_start_optimize,?//) { $pat = ' (??{""})' . $pat ; }
325363
326364 # Add back retained modifiers and check that the pattern is valid.
327365
328366 $mod =~ s/,//g;
329-
330367 $pattern = "$del$pat$del$mod";
331368
332369 eval "\$_ =~ ${pattern}";
@@ -419,7 +456,7 @@ for (;;)
419456
420457 if ($@)
421458 {
422- printf $outfile "Error: $@\n ";
459+ printf $outfile "Error: $@";
423460 next NEXT_RE;
424461 }
425462 elsif (scalar(@subs) == 0)
0 commit comments