Skip to content

Commit b03b8be

Browse files
committed
ICU-21639 Added an internal utility class to streamline preflighting and heap-allocating a char buffer for a locale ID
and changed several internal methods in ULocale to use it, so that they work correctly on locale IDs that are longer than ULOC_FULLNAME_CAPACITY.
1 parent 4368f69 commit b03b8be

File tree

4 files changed

+152
-28
lines changed

4 files changed

+152
-28
lines changed

icu4c/source/common/loclikely.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,30 +1181,28 @@ _uloc_minimizeSubtags(const char* localeID,
11811181
}
11821182
}
11831183

1184-
static UBool
1184+
static int32_t
11851185
do_canonicalize(const char* localeID,
11861186
char* buffer,
11871187
int32_t bufferCapacity,
11881188
UErrorCode* err)
11891189
{
1190-
uloc_canonicalize(
1190+
int32_t canonicalizedSize = uloc_canonicalize(
11911191
localeID,
11921192
buffer,
11931193
bufferCapacity,
11941194
err);
11951195

11961196
if (*err == U_STRING_NOT_TERMINATED_WARNING ||
11971197
*err == U_BUFFER_OVERFLOW_ERROR) {
1198-
*err = U_ILLEGAL_ARGUMENT_ERROR;
1199-
1200-
return FALSE;
1198+
return canonicalizedSize;
12011199
}
12021200
else if (U_FAILURE(*err)) {
12031201

1204-
return FALSE;
1202+
return -1;
12051203
}
12061204
else {
1207-
return TRUE;
1205+
return canonicalizedSize;
12081206
}
12091207
}
12101208

@@ -1241,12 +1239,17 @@ static UBool
12411239
_ulocimp_addLikelySubtags(const char* localeID,
12421240
icu::ByteSink& sink,
12431241
UErrorCode* status) {
1244-
char localeBuffer[ULOC_FULLNAME_CAPACITY];
1245-
1246-
if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1247-
return _uloc_addLikelySubtags(localeBuffer, sink, status);
1242+
PreflightingLocaleIDBuffer localeBuffer;
1243+
do {
1244+
localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
1245+
localeBuffer.getCapacity(), status);
1246+
} while (localeBuffer.needToTryAgain(status));
1247+
1248+
if (U_SUCCESS(*status)) {
1249+
return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
1250+
} else {
1251+
return FALSE;
12481252
}
1249-
return FALSE;
12501253
}
12511254

12521255
U_CAPI void U_EXPORT2
@@ -1289,11 +1292,13 @@ U_CAPI void U_EXPORT2
12891292
ulocimp_minimizeSubtags(const char* localeID,
12901293
icu::ByteSink& sink,
12911294
UErrorCode* status) {
1292-
char localeBuffer[ULOC_FULLNAME_CAPACITY];
1293-
1294-
if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1295-
_uloc_minimizeSubtags(localeBuffer, sink, status);
1296-
}
1295+
PreflightingLocaleIDBuffer localeBuffer;
1296+
do {
1297+
localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
1298+
localeBuffer.getCapacity(), status);
1299+
} while (localeBuffer.needToTryAgain(status));
1300+
1301+
_uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
12971302
}
12981303

12991304
// Pairs of (language subtag, + or -) for finding out fast if common languages

icu4c/source/common/uloc.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -478,15 +478,19 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = {
478478
/* Test if the locale id has BCP47 u extension and does not have '@' */
479479
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480480
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
481-
#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
482-
if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
483-
U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
484-
finalID=id; \
485-
if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
486-
} else { \
487-
finalID=buffer; \
488-
} \
489-
} UPRV_BLOCK_MACRO_END
481+
static int32_t _ConvertBCP47(
482+
const char*& finalID, const char* id, char* buffer, int32_t length, UErrorCode* err) {
483+
int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
484+
if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
485+
finalID=id;
486+
if (*err == U_STRING_NOT_TERMINATED_WARNING) {
487+
*err = U_BUFFER_OVERFLOW_ERROR;
488+
}
489+
} else {
490+
finalID=buffer;
491+
}
492+
return localeIDSize;
493+
}
490494
/* Gets the size of the shortest subtag in the given localeID. */
491495
static int32_t getShortestSubtagLength(const char *localeID) {
492496
int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
@@ -1474,7 +1478,7 @@ _canonicalize(const char* localeID,
14741478
uint32_t options,
14751479
UErrorCode* err) {
14761480
int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
1477-
char tempBuffer[ULOC_FULLNAME_CAPACITY];
1481+
PreflightingLocaleIDBuffer tempBuffer;
14781482
const char* origLocaleID;
14791483
const char* tmpLocaleID;
14801484
const char* keywordAssign = NULL;
@@ -1485,7 +1489,10 @@ _canonicalize(const char* localeID,
14851489
}
14861490

14871491
if (_hasBCP47Extension(localeID)) {
1488-
_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1492+
do {
1493+
tempBuffer.requestedCapacity = _ConvertBCP47(tmpLocaleID, localeID,
1494+
tempBuffer.getBuffer(), tempBuffer.getCapacity(), err);
1495+
} while (tempBuffer.needToTryAgain(err));
14891496
} else {
14901497
if (localeID==NULL) {
14911498
localeID=uloc_getDefault();

icu4c/source/common/ulocimp.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,4 +307,72 @@ U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* le
307307
// Return true if the value is already canonicalized.
308308
U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
309309

310+
/**
311+
* A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
312+
* This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
313+
* and then, if it's not big enough, reallocate it on the heap and try again.
314+
*
315+
* You use it like this:
316+
* UErrorCode err = U_ZERO_ERROR;
317+
*
318+
* PreflightingLocaleIDBuffer tempBuffer;
319+
* do {
320+
* tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
321+
* } while (tempBuffer.needToTryAgain(&err));
322+
* if (U_SUCCESS(err)) {
323+
* uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
324+
* }
325+
*/
326+
class PreflightingLocaleIDBuffer {
327+
private:
328+
char stackBuffer[ULOC_FULLNAME_CAPACITY];
329+
char* heapBuffer = nullptr;
330+
int32_t capacity = ULOC_FULLNAME_CAPACITY;
331+
332+
public:
333+
int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;
334+
335+
// No heap allocation. Use only on the stack.
336+
static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
337+
static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
338+
#if U_HAVE_PLACEMENT_NEW
339+
static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
340+
#endif
341+
342+
PreflightingLocaleIDBuffer() {}
343+
344+
~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }
345+
346+
char* getBuffer() {
347+
if (heapBuffer == nullptr) {
348+
return stackBuffer;
349+
} else {
350+
return heapBuffer;
351+
}
352+
}
353+
354+
int32_t getCapacity() {
355+
return capacity;
356+
}
357+
358+
bool needToTryAgain(UErrorCode* err) {
359+
if (heapBuffer != nullptr) {
360+
return false;
361+
}
362+
363+
if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
364+
int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia
365+
heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
366+
if (heapBuffer == nullptr) {
367+
*err = U_MEMORY_ALLOCATION_ERROR;
368+
} else {
369+
*err = U_ZERO_ERROR;
370+
capacity = newCapacity;
371+
}
372+
return U_SUCCESS(*err);
373+
}
374+
return false;
375+
}
376+
};
377+
310378
#endif

icu4c/source/test/cintltst/cloctst.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ static void TestBug20370(void);
5858
static void TestBug20321UnicodeLocaleKey(void);
5959

6060
static void TestUsingDefaultWarning(void);
61+
static void TestExcessivelyLongIDs(void);
6162

6263
void PrintDataTable();
6364

@@ -281,6 +282,7 @@ void addLocaleTest(TestNode** root)
281282
TESTCASE(TestBug20321UnicodeLocaleKey);
282283
TESTCASE(TestUsingDefaultWarning);
283284
TESTCASE(TestBug21449InfiniteLoop);
285+
TESTCASE(TestExcessivelyLongIDs);
284286
}
285287

286288

@@ -7009,3 +7011,45 @@ static void TestBug21449InfiniteLoop() {
70097011
// so the test is considered passed if the call to the API below returns anything at all.
70107012
uloc_getDisplayLanguage(invalidLocaleId, invalidLocaleId, NULL, 0, &status);
70117013
}
7014+
7015+
// rdar://79296849 and https://unicode-org.atlassian.net/browse/ICU-21639
7016+
static void TestExcessivelyLongIDs(void) {
7017+
const char* reallyLongID =
7018+
"de-u-cu-eur-em-default-hc-h23-ks-level1-lb-strict-lw-normal-ms-metric"
7019+
"-nu-latn-rg-atzzzz-sd-atat1-ss-none-tz-atvie-va-posix";
7020+
char minimizedID[ULOC_FULLNAME_CAPACITY];
7021+
char maximizedID[ULOC_FULLNAME_CAPACITY];
7022+
int32_t actualMinimizedLength = 0;
7023+
int32_t actualMaximizedLength = 0;
7024+
UErrorCode err = U_ZERO_ERROR;
7025+
7026+
actualMinimizedLength = uloc_minimizeSubtags(reallyLongID, minimizedID, ULOC_FULLNAME_CAPACITY, &err);
7027+
assertTrue("uloc_minimizeSubtags() with too-small buffer didn't fail as expected",
7028+
U_FAILURE(err) && actualMinimizedLength > ULOC_FULLNAME_CAPACITY);
7029+
7030+
err = U_ZERO_ERROR;
7031+
actualMaximizedLength = uloc_addLikelySubtags(reallyLongID, maximizedID, ULOC_FULLNAME_CAPACITY, &err);
7032+
assertTrue("uloc_addLikelySubtags() with too-small buffer didn't fail as expected",
7033+
U_FAILURE(err) && actualMaximizedLength > ULOC_FULLNAME_CAPACITY);
7034+
7035+
err = U_ZERO_ERROR;
7036+
char* realMinimizedID = (char*)uprv_malloc(actualMinimizedLength + 1);
7037+
uloc_minimizeSubtags(reallyLongID, realMinimizedID, actualMinimizedLength + 1, &err);
7038+
if (assertSuccess("uloc_minimizeSubtags() failed", &err)) {
7039+
assertEquals("Wrong result from uloc_minimizeSubtags()",
7040+
"de__POSIX@colstrength=primary;currency=eur;em=default;hours=h23;lb=strict;"
7041+
"lw=normal;measure=metric;numbers=latn;rg=atzzzz;sd=atat1;ss=none;timezone=Europe/Vienna",
7042+
realMinimizedID);
7043+
}
7044+
uprv_free(realMinimizedID);
7045+
7046+
char* realMaximizedID = (char*)uprv_malloc(actualMaximizedLength + 1);
7047+
uloc_addLikelySubtags(reallyLongID, realMaximizedID, actualMaximizedLength + 1, &err);
7048+
if (assertSuccess("uloc_addLikelySubtags() failed", &err)) {
7049+
assertEquals("Wrong result from uloc_addLikelySubtags()",
7050+
"de_Latn_DE_POSIX@colstrength=primary;currency=eur;em=default;hours=h23;lb=strict;"
7051+
"lw=normal;measure=metric;numbers=latn;rg=atzzzz;sd=atat1;ss=none;timezone=Europe/Vienna",
7052+
realMaximizedID);
7053+
}
7054+
uprv_free(realMaximizedID);
7055+
}

0 commit comments

Comments
 (0)