Skip to content

Commit 8d83b7d

Browse files
authored
gh-139156: Optimize the UTF-7 encoder (#139253)
Remove base64SetO and base64WhiteSpace parameters.
1 parent c7b11b7 commit 8d83b7d

File tree

3 files changed

+6
-13
lines changed

3 files changed

+6
-13
lines changed

Include/internal/pycore_unicodeobject.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,6 @@ extern int _PyUnicodeWriter_FormatV(
9292

9393
extern PyObject* _PyUnicode_EncodeUTF7(
9494
PyObject *unicode, /* Unicode object */
95-
int base64SetO, /* Encode RFC2152 Set O characters in base64 */
96-
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
9795
const char *errors); /* error handling */
9896

9997
/* --- UTF-8 Codecs ------------------------------------------------------- */

Modules/_codecsmodule.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
671671
const char *errors)
672672
/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
673673
{
674-
return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
674+
return codec_tuple(_PyUnicode_EncodeUTF7(str, errors),
675675
PyUnicode_GET_LENGTH(str));
676676
}
677677

Objects/unicodeobject.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4670,15 +4670,12 @@ char utf7_category[128] = {
46704670

46714671
/* ENCODE_DIRECT: this character should be encoded as itself. The
46724672
* answer depends on whether we are encoding set O as itself, and also
4673-
* on whether we are encoding whitespace as itself. RFC2152 makes it
4673+
* on whether we are encoding whitespace as itself. RFC 2152 makes it
46744674
* clear that the answers to these questions vary between
46754675
* applications, so this code needs to be flexible. */
46764676

4677-
#define ENCODE_DIRECT(c, directO, directWS) \
4678-
((c) < 128 && (c) > 0 && \
4679-
((utf7_category[(c)] == 0) || \
4680-
(directWS && (utf7_category[(c)] == 2)) || \
4681-
(directO && (utf7_category[(c)] == 1))))
4677+
#define ENCODE_DIRECT(c) \
4678+
((c) < 128 && (c) > 0 && ((utf7_category[(c)] != 3)))
46824679

46834680
PyObject *
46844681
PyUnicode_DecodeUTF7(const char *s,
@@ -4895,8 +4892,6 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
48954892

48964893
PyObject *
48974894
_PyUnicode_EncodeUTF7(PyObject *str,
4898-
int base64SetO,
4899-
int base64WhiteSpace,
49004895
const char *errors)
49014896
{
49024897
Py_ssize_t len = PyUnicode_GET_LENGTH(str);
@@ -4923,7 +4918,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
49234918
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
49244919

49254920
if (inShift) {
4926-
if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
4921+
if (ENCODE_DIRECT(ch)) {
49274922
/* shifting out */
49284923
if (base64bits) { /* output remaining bits */
49294924
*out++ = TO_BASE64(base64buffer << (6-base64bits));
@@ -4947,7 +4942,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
49474942
*out++ = '+';
49484943
*out++ = '-';
49494944
}
4950-
else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
4945+
else if (ENCODE_DIRECT(ch)) {
49514946
*out++ = (char) ch;
49524947
}
49534948
else {

0 commit comments

Comments
 (0)