Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/pcre2_auto_possess.c
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,8 @@ for(;;)
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
list_ptr[2] + LINK_SIZE, utf)) return FALSE;
list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf))
return FALSE;
break;
#endif

Expand All @@ -1124,7 +1125,9 @@ for(;;)
case OP_ECLASS:
if (PRIV(eclass)(chr,
(list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
(list_ptr == list ? code : base_end) - list_ptr[3], utf)) return FALSE;
(list_ptr == list ? code : base_end) - list_ptr[3],
(const uint8_t*)cb->start_code, utf))
return FALSE;
break;

default:
Expand Down
34 changes: 29 additions & 5 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -5919,7 +5919,6 @@ for (;; pptr++)
zerofirstcuflags = firstcuflags;
zeroreqcu = reqcu;
zeroreqcuflags = reqcuflags;

break; /* End of class processing */


Expand Down Expand Up @@ -9810,6 +9809,7 @@ cb.workspace_size = COMPILE_WORK_SIZE;
#ifdef SUPPORT_WIDE_CHARS
cb.cranges = NULL;
cb.next_cranges = NULL;
cb.char_lists_size = 0;
#endif

/* Maximum back reference and backref bitmap. The bitmap records up to 31 back
Expand Down Expand Up @@ -10200,7 +10200,13 @@ if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */

/* This should be caught in compile_regex(), but just in case... */

#if defined SUPPORT_WIDE_CHARS
PCRE2_ASSERT((cb.char_lists_size & 0x3) == 0);
if (length > MAX_PATTERN_SIZE ||
MAX_PATTERN_SIZE - length < (cb.char_lists_size / sizeof(PCRE2_UCHAR)))
#else
if (length > MAX_PATTERN_SIZE)
#endif
{
errorcode = ERR20;
goto HAD_CB_ERROR;
Expand All @@ -10211,8 +10217,22 @@ block for storing the compiled pattern and names table. Integer overflow should
no longer be possible because nowadays we limit the maximum value of
cb.names_found and cb.name_entry_size. */

re_blocksize = CU2BYTES(length +
(PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size);
re_blocksize =
CU2BYTES((PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size);

#if defined SUPPORT_WIDE_CHARS
if (cb.char_lists_size != 0)
{
#if PCRE2_CODE_UNIT_WIDTH != 32
/* Align to 32 bit first. This ensures the
allocated area will also be 32 bit aligned. */
re_blocksize = (PCRE2_SIZE)CLIST_ALIGN_TO(re_blocksize, sizeof(uint32_t));
#endif
re_blocksize += cb.char_lists_size;
}
#endif

re_blocksize += CU2BYTES(length);

if (re_blocksize > ccontext->max_pattern_compiled_length)
{
Expand Down Expand Up @@ -10241,6 +10261,7 @@ re->tables = tables;
re->executable_jit = NULL;
memset(re->start_bitmap, 0, 32 * sizeof(uint8_t));
re->blocksize = re_blocksize;
re->code_start = re_blocksize - CU2BYTES(length);
re->magic_number = MAGIC_NUMBER;
re->compile_options = options;
re->overall_options = cb.external_options;
Expand All @@ -10264,8 +10285,7 @@ re->optimization_flags = optim_flags;
/* The basic block is immediately followed by the name table, and the compiled
code follows after that. */

codestart = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_entry_size * re->name_count;
codestart = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start);

/* Update the compile data block for the actual compile. The starting points of
the name/number translation table and of the code are passed around in the
Expand All @@ -10280,6 +10300,10 @@ cb.start_code = codestart;
cb.req_varyopt = 0;
cb.had_accept = FALSE;
cb.had_pruneorskip = FALSE;
#ifdef SUPPORT_WIDE_CHARS
cb.char_lists_size = 0;
#endif


/* If any named groups were found, create the name/number table from the list
created in the pre-pass. */
Expand Down
3 changes: 3 additions & 0 deletions src/pcre2_compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ therefore no need for it to have a length entry, so use a high value. */
#define SELECT_VALUE8(value8, value) (value)
#endif

/* Macro for aligning data. */
#define CLIST_ALIGN_TO(base, align) \
((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1))

/* Macros for the definitions below, to prevent name collisions. */

Expand Down
101 changes: 45 additions & 56 deletions src/pcre2_compile_class.c
Original file line number Diff line number Diff line change
Expand Up @@ -1703,94 +1703,83 @@ if ((xclass_props & XCLASS_REQUIRED) != 0)

if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0)
{
/* Char lists size is an even number,
because all items are 16 or 32 bit values. */
/* Char lists size is an even number, because all items are 16 or 32
bit values. The character list data is always aligned to 32 bits. */
size_t char_lists_size = cranges->char_lists_size;
PCRE2_ASSERT((char_lists_size & 0x1) == 0);
PCRE2_ASSERT((char_lists_size & 0x1) == 0 &&
(cb->char_lists_size & 0x3) == 0);

if (lengthptr != NULL)
{
/* At this point, we don't know the precise location
so the maximum alignment is added to the length. */
char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));

#if PCRE2_CODE_UNIT_WIDTH == 8
*lengthptr += 2 /* sizeof(type) in PCRE2_UCHARs */ +
3 /* maximum alignment. */;
#elif PCRE2_CODE_UNIT_WIDTH == 16
*lengthptr += 1 /* sizeof(type) in PCRE2_UCHARs */ +
1 /* maximum alignment. */;
char_lists_size >>= 1;
*lengthptr += 2 + LINK_SIZE;
#else
*lengthptr += 1 /* sizeof(type) in PCRE2_UCHARs */;
/* Padding, when the size is not divisible by 4. */
if ((char_lists_size & 0x2) != 0)
char_lists_size += 2;
char_lists_size >>= 2;
*lengthptr += 1 + LINK_SIZE;
#endif

if (INT_MAX - *lengthptr < char_lists_size)
{
*errorcodeptr = ERR20; /* Integer overflow */
return FALSE;
}
cb->char_lists_size += char_lists_size;

*lengthptr += char_lists_size;
char_lists_size /= sizeof(PCRE2_UCHAR);

if (*lengthptr > MAX_PATTERN_SIZE)
/* Storage space for character lists is included
in the maximum pattern size. */
if (*lengthptr > MAX_PATTERN_SIZE ||
MAX_PATTERN_SIZE - *lengthptr < char_lists_size)
{
*errorcodeptr = ERR20; /* Pattern is too large */
return FALSE;
return 0;
}
}
else
{
uint8_t *char_buffer = (uint8_t*)code;
uint8_t *data;

PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK);
#if PCRE2_CODE_UNIT_WIDTH == 8
/* Encode as high / low bytes. */
code[0] = (uint8_t)(XCL_LIST |
(cranges->char_lists_types >> 8));
code[1] = (uint8_t)cranges->char_lists_types;
char_buffer += 2;
code += 2;
#else
*code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
#endif

/* Compute alignment. */
if (((uintptr_t)char_buffer & 0x1) != 0)
{
code[0] |= 1u << (XCL_ALIGNMENT_SHIFT - 8);
char_buffer += 1;
}
/* Character lists are stored in backwards direction from
byte code start. The non-dfa/dfa matchers can access these
lists using the byte code start stored in match blocks.
Each list is aligned to 32 bit with an optional unused
16 bit value at the beginning of the character list. */

if (((uintptr_t)char_buffer & 0x2) != (char_lists_size & 0x2))
{
code[0] |= 2u << (XCL_ALIGNMENT_SHIFT - 8);
char_buffer += 2;
}
#elif PCRE2_CODE_UNIT_WIDTH == 16
code[0] = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
char_buffer += 2;
cb->char_lists_size += char_lists_size;
data = (uint8_t*)cb->start_code - cb->char_lists_size;

/* Compute alignment. */
if (((uintptr_t)char_buffer & 0x2) != (char_lists_size & 0x2))
{
code[0] |= 2u << XCL_ALIGNMENT_SHIFT;
char_buffer += 2;
}
#else
code[0] = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
char_buffer += 4;
memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start,
char_lists_size);

/* Since character lists total size is less than MAX_PATTERN_SIZE,
their starting offset fits into a value which size is LINK_SIZE. */

char_lists_size = cb->char_lists_size;
PUT(code, 0, (uint32_t)(char_lists_size >> 1));
code += LINK_SIZE;

/* Padding. */
#if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND
if ((char_lists_size & 0x2) != 0)
{
code[0] |= 2u << XCL_ALIGNMENT_SHIFT;
char_buffer += 2;
/* In debug the unused 16 bit value is set
to a fixed value and marked unused. */
((uint16_t*)data)[-1] = 0x5555;
#ifdef SUPPORT_VALGRIND
VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2);
#endif
}
#endif
memcpy(char_buffer,
(uint8_t*)(cranges + 1) + cranges->char_lists_start,
char_lists_size);

code = (PCRE2_UCHAR*)(char_buffer + char_lists_size);
cb->char_lists_size =
CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));

cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
}
Expand Down
11 changes: 7 additions & 4 deletions src/pcre2_dfa_match.c
Original file line number Diff line number Diff line change
Expand Up @@ -2682,7 +2682,9 @@ for (;;)
if (codevalue == OP_XCLASS)
{
ecode = code + GET(code, 1);
if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
if (clen > 0)
isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE,
(const uint8_t*)mb->start_code, utf);
}

/* A nested set-based class has internal opcodes for performing
Expand All @@ -2691,7 +2693,9 @@ for (;;)
else if (codevalue == OP_ECLASS)
{
ecode = code + GET(code, 1);
if (clen > 0) isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode, utf);
if (clen > 0)
isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode,
(const uint8_t*)mb->start_code, utf);
}

/* For a simple class, there is always just a 32-byte table, and we
Expand Down Expand Up @@ -3536,8 +3540,7 @@ if (mb->match_limit_depth > re->limit_depth)
if (mb->heap_limit > re->limit_heap)
mb->heap_limit = re->limit_heap;

mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_count * re->name_entry_size;
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
mb->tables = re->tables;
mb->start_subject = subject;
mb->end_subject = end_subject;
Expand Down
15 changes: 5 additions & 10 deletions src/pcre2_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1358,9 +1358,8 @@ contain characters with values greater than 255. */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* This value represents the beginning of character lists. The value
is 16 bit long, and stored as a high and low byte pair in 8 bit mode.
The lower 12 bit contains information about character lists (see later)
and next two bits contains the alignment (padding) data. */
#define XCL_LIST (sizeof(PCRE2_UCHAR) == 1 ? 0x40 : 0x4000)
The lower 12 bit contains information about character lists (see later). */
#define XCL_LIST (sizeof(PCRE2_UCHAR) == 1 ? 0x10 : 0x1000)

/* When a character class contains many characters/ranges,
they are stored in character lists. There are four character
Expand Down Expand Up @@ -1423,11 +1422,6 @@ represents that the item count is stored at the begining of the
character list. The item count has the same width as the items
in the character list (e.g. 16 bit for Low16 and High16 lists). */
#define XCL_ITEM_COUNT_MASK 0x3
/* Shift and mask for getting alignment data. The items of a character
list are always naturally aligned. Adding this value to the byte position
of the XCL_LIST header ensures the required alignment of the items. */
#define XCL_ALIGNMENT_SHIFT 12
#define XCL_ALIGNMENT_MASK 0x3
/* Shift and flag for constructing character list items. The XCL_CHAR_END
is set, when the item is not the beginning of a range. The XCL_CHAR_SHIFT
can be used to encode / decode the character value stored in an item. */
Expand Down Expand Up @@ -2199,8 +2193,9 @@ extern int _pcre2_study(pcre2_real_code *);
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
uint32_t *, BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
extern BOOL _pcre2_eclass(uint32_t, PCRE2_SPTR, PCRE2_SPTR, BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, const uint8_t *, BOOL);
extern BOOL _pcre2_eclass(uint32_t, PCRE2_SPTR, PCRE2_SPTR,
const uint8_t *, BOOL);

/* This function is needed only when memmove() is not available. */

Expand Down
6 changes: 4 additions & 2 deletions src/pcre2_intmodedep.h
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,7 @@ typedef struct pcre2_real_code {
void *executable_jit; /* Pointer to JIT code */
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
CODE_BLOCKSIZE_TYPE code_start; /* Byte code start offset */
uint32_t magic_number; /* Paranoid and endianness check */
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
Expand Down Expand Up @@ -786,8 +787,9 @@ typedef struct compile_block {
BOOL had_recurse; /* Had a pattern recursion or subroutine call */
BOOL dupnames; /* Duplicate names exist */
#ifdef SUPPORT_WIDE_CHARS
class_ranges* cranges; /* First class range. */
class_ranges* next_cranges; /* Next class range. */
class_ranges *cranges; /* First class range. */
class_ranges *next_cranges; /* Next class range. */
size_t char_lists_size; /* Current size of character lists */
#endif
} compile_block;

Expand Down
7 changes: 4 additions & 3 deletions src/pcre2_jit_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -7778,8 +7778,7 @@ cc++;
#endif /* CODE_UNIT_WIDTH */

/* Align characters. */
next_char = (const uint8_t*)cc;
next_char += (type >> XCL_ALIGNMENT_SHIFT) & XCL_ALIGNMENT_MASK;
next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
type &= XCL_TYPE_MASK;

/* Estimate size. */
Expand Down Expand Up @@ -7851,6 +7850,7 @@ while (type > 0)
if (item_count == XCL_ITEM_COUNT_MASK)
{
READ_FROM_CHAR_LIST(item_count);
SLJIT_ASSERT(item_count >= XCL_ITEM_COUNT_MASK);
}

while (item_count > 0)
Expand Down Expand Up @@ -7918,6 +7918,7 @@ while (type > 0)
}

SLJIT_ASSERT(range_count > 0 && range_count <= (est_range_count << 1));
SLJIT_ASSERT(next_char <= (const uint8_t*)common->start);
ranges->range_count = range_count;
}

Expand Down Expand Up @@ -14702,7 +14703,7 @@ memset(&rootbacktrack, 0, sizeof(backtrack_common));
memset(common, 0, sizeof(compiler_common));
common->re = re;
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
rootbacktrack.cc = (PCRE2_SPTR)((uint8_t *)re + re->code_start);

#ifdef SUPPORT_UNICODE
common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
Expand Down
Loading
Loading