@@ -713,7 +713,7 @@ static modstruct modlist[] = {
713713 { "global" , MOD_PNDP , MOD_CTL , CTL_GLOBAL , PO (control ) },
714714 { "heap_limit" , MOD_CTM , MOD_INT , 0 , MO (heap_limit ) },
715715 { "heapframes_size" , MOD_PND , MOD_CTL , CTL2_HEAPFRAMES_SIZE , PO (control2 ) },
716- { "hex" , MOD_PAT , MOD_CTL , CTL_HEXPAT , PO (control ) },
716+ { "hex" , MOD_PATP , MOD_CTL , CTL_HEXPAT , PO (control ) },
717717 { "info" , MOD_PAT , MOD_CTL , CTL_INFO , PO (control ) },
718718 { "jit" , MOD_PAT , MOD_IND , 7 , PO (jit ) },
719719 { "jitfast" , MOD_PAT , MOD_CTL , CTL_JITFAST , PO (control ) },
@@ -7264,7 +7264,7 @@ while ((c = *p++) != 0)
72647264 c = 0 ;
72657265 for (pt ++ ; isdigit (* pt ) && * pt < '8' ; ++ i , pt ++ )
72667266 {
7267- if (c >= 0x20000000l )
7267+ if (c >= 0x20000000u )
72687268 {
72697269 fprintf (outfile , "** \\o{ escape too large\n" );
72707270 return PR_OK ;
@@ -7397,20 +7397,38 @@ while ((c = *p++) != 0)
73977397 "and therefore cannot be encoded as UTF-8\n" , c );
73987398 return PR_OK ;
73997399 }
7400+ else if (encoding == FORCE_UTF && c > MAX_UTF_CODE_POINT )
7401+ fprintf (outfile , "** Warning: character \\N{U+%x} is greater than "
7402+ "0x%x and should not be encoded as UTF-8\n" ,
7403+ c , MAX_UTF_CODE_POINT );
74007404 q8 += ord2utf8 (c , q8 );
74017405 }
74027406 }
74037407#endif
74047408#ifdef SUPPORT_PCRE2_16
74057409 if (test_mode == PCRE16_MODE )
74067410 {
7407- if (encoding == FORCE_UTF || utf )
7411+ /* Unlike the 8-bit code, there are no forced raw suggestions for the
7412+ 16-bit mode, so assume raw unless utf is preferred */
7413+
7414+ if (!(encoding == FORCE_UTF || utf ))
74087415 {
7409- if (c > 0x10ffffu )
7416+ if (c > 0xffffu )
7417+ {
7418+ fprintf (outfile , "** Character \\x{%x} is greater than 0xffff "
7419+ "and UTF-16 mode is not enabled.\n" , c );
7420+ fprintf (outfile , "** Truncation will probably give the wrong "
7421+ "result.\n" );
7422+ }
7423+ * q16 ++ = (uint16_t )c ;
7424+ }
7425+ else
7426+ {
7427+ if (c > MAX_UTF_CODE_POINT )
74107428 {
74117429 fprintf (outfile , "** Failed: character \\N{U+%x} is greater than "
7412- "0x10ffff and therefore cannot be encoded as "
7413- "UTF-16\n" , c );
7430+ "0x%x and therefore cannot be encoded as UTF-16\n" ,
7431+ c , MAX_UTF_CODE_POINT );
74147432 return PR_OK ;
74157433 }
74167434 else if (c >= 0x10000u )
@@ -7419,24 +7437,25 @@ while ((c = *p++) != 0)
74197437 * q16 ++ = 0xD800 | (c >> 10 );
74207438 * q16 ++ = 0xDC00 | (c & 0x3ff );
74217439 }
7422- else * q16 ++ = c ;
7423- }
7424- else
7425- {
7426- if (c > 0xffffu )
7440+ else
74277441 {
7428- fprintf ( outfile , "** Character \\x{%x} is greater than 0xffff "
7429- "and UTF-16 mode is not enabled.\n" , c );
7430- fprintf ( outfile , "** Truncation will probably give the wrong "
7431- "result.\n" ) ;
7442+ if ( encoding == FORCE_UTF && 0xe000u > c && c >= 0xd800u )
7443+ fprintf ( outfile , "** Warning: character \\N{U+%x} is a surrogate "
7444+ "and should not be encoded as UTF-16\n" , c );
7445+ * q16 ++ = c ;
74327446 }
7433-
7434- * q16 ++ = (uint16_t )c ;
74357447 }
74367448 }
74377449#endif
74387450#ifdef SUPPORT_PCRE2_32
7439- if (test_mode == PCRE32_MODE ) * q32 ++ = c ;
7451+ if (test_mode == PCRE32_MODE )
7452+ {
7453+ if (encoding == FORCE_UTF && c > MAX_UTF_CODE_POINT )
7454+ fprintf (outfile , "** Warning: character \\N{U+%x} is greater than "
7455+ "0x%x and should not be encoded as UTF-32\n" ,
7456+ c , MAX_UTF_CODE_POINT );
7457+ * q32 ++ = c ;
7458+ }
74407459#endif
74417460 }
74427461
0 commit comments