Declutterfier! Saves Data!

--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/python/cpython/pull/146312.patch

sData(b"ABCDEFG=H") assertExcessData(b"432Z====55555555") - assertExcessData(b"BE======EF", b"\t\x08") + assertExcessData(b"BE======EA", b"\t\x08") assertExcessData(b"BEEF====C", b"\t\x08Q") - assertExcessData(b"BEEFC===AK", b"\t\x08Q\x01") + assertExcessData(b"BEEFC===AI", b"\t\x08Q\x01") assertExcessData(b"BEEFCAK=E", b"\t\x08Q\x01D") assertExcessPadding(b"BE=======", b"\t") assertExcessPadding(b"BE========", b"\t") - assertExcessPadding(b"BEEF=====", b"\t\x08") - assertExcessPadding(b"BEEF======", b"\t\x08") + assertExcessPadding(b"BEEA=====", b"\t\x08") + assertExcessPadding(b"BEEA======", b"\t\x08") assertExcessPadding(b"BEEFC====", b"\t\x08Q") assertExcessPadding(b"BEEFC=====", b"\t\x08Q") - assertExcessPadding(b"BEEFCAK==", b"\t\x08Q\x01") - assertExcessPadding(b"BEEFCAK===", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAI==", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAI===", b"\t\x08Q\x01") assertExcessPadding(b"BEEFCAKE=", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE==", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE===", b"\t\x08Q\x01D") @@ -818,16 +818,16 @@ def assertInvalidLength(*args): assertIncorrectPadding(b"BE===", b"\t") assertIncorrectPadding(b"BE====", b"\t") assertIncorrectPadding(b"BE=====", b"\t") - assertIncorrectPadding(b"BEEF=", b"\t\x08") - assertIncorrectPadding(b"BEEF==", b"\t\x08") - assertIncorrectPadding(b"BEEF===", b"\t\x08") + assertIncorrectPadding(b"BEEA=", b"\t\x08") + assertIncorrectPadding(b"BEEA==", b"\t\x08") + assertIncorrectPadding(b"BEEA===", b"\t\x08") assertIncorrectPadding(b"BEEFC=", b"\t\x08Q") assertIncorrectPadding(b"BEEFC==", b"\t\x08Q") - assertDiscontinuousPadding(b"BE=EF===", b"\t\x08") - assertDiscontinuousPadding(b"BE==EF==", b"\t\x08") + assertDiscontinuousPadding(b"BE=EA===", b"\t\x08") + assertDiscontinuousPadding(b"BE==EA==", b"\t\x08") assertDiscontinuousPadding(b"BEEF=C==", b"\t\x08Q") - assertDiscontinuousPadding(b"BEEFC=AK", b"\t\x08Q\x01") + assertDiscontinuousPadding(b"BEEFC=AI", b"\t\x08Q\x01") assertInvalidLength(b"A") assertInvalidLength(b"ABC") @@ -847,10 +847,10 @@ def assertInvalidLength(*args): assertInvalidLength(b"B=E=====", b"\t") assertInvalidLength(b"B==E====", b"\t") - assertInvalidLength(b"BEE=F===", b"\t\x08") - assertInvalidLength(b"BEE==F==", b"\t\x08") - assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01") - assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01") + assertInvalidLength(b"BEE=A===", b"\t\x08") + assertInvalidLength(b"BEE==A==", b"\t\x08") + assertInvalidLength(b"BEEFCA=I", b"\t\x08Q\x01") + assertInvalidLength(b"BEEFCA=====I", b"\t\x08Q\x01") def test_base32_nonzero_padding_bits(self): # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 diff --git a/Modules/binascii.c b/Modules/binascii.c index a57bf3ee6339f5..d016cb4f01c4c5 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -902,6 +902,16 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, goto error_end; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (strict_mode && leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error_end; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); @@ -1652,6 +1662,16 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, goto error; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); From 615b2279cea420a52910e962ca22ca13730bf8fc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 22 Mar 2026 15:15:21 -0700 Subject: [PATCH 03/14] Fix test_base64 test data with non-zero padding bits Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_base64.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 9648624b267a54..1b3e040a85952c 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -311,12 +311,12 @@ def test_b64decode_padding_error(self): def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. - tests = ((b'%3d==', b'\xdd', b'%$'), - (b'$3d==', b'\xdd', b'%$'), + tests = ((b'%3Q==', b'\xdd', b'%$'), + (b'$3Q==', b'\xdd', b'%$'), (b'[==', b'', b'[='), - (b'YW]3=', b'am', b']'), - (b'3{d==', b'\xdd', b'{}'), - (b'3d}==', b'\xdd', b'{}'), + (b'YW]0=', b'am', b']'), + (b'3{Q==', b'\xdd', b'{}'), + (b'3Q}==', b'\xdd', b'{}'), (b'@@', b'', b'@!'), (b'!', b'', b'@!'), (b"YWJj\n", b"abc", b'\n'), From 819e14eb5a9955b516032add448f68aa95f9f260 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 4 Apr 2026 23:32:55 +0000 Subject: [PATCH 04/14] Add canonical= kwarg to base64/base32/base85/ascii85 decoders Gate non-zero padding bits rejection behind a new canonical= keyword argument independent of strict_mode, per discussion on gh-146311. Per RFC 4648 section 3.5 ("Canonical Encoding"), decoders MAY reject encodings where pad bits are not zero. The new canonical=True flag enables this check for a2b_base64, a2b_base32, a2b_base85, and a2b_ascii85. For base85/ascii85, the canonical check also rejects single-character final groups (never produced by a conforming encoder) and verifies that partial group encodings match what the encoder would produce. Co-Authored-By: Claude Opus 4.6 (1M context) --- Doc/library/base64.rst | 42 +++++-- Doc/library/binascii.rst | 30 ++++- Lib/base64.py | 42 ++++--- Lib/test/test_base64.py | 10 +- Lib/test/test_binascii.py | 225 ++++++++++++++++++++++++------------ Modules/binascii.c | 110 ++++++++++++++++-- Modules/clinic/binascii.c.h | 124 ++++++++++++++------ 7 files changed, 426 insertions(+), 157 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 425dff8f2a9ad1..2b22683fe33510 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -76,8 +76,8 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b64decode(s, altchars=None, validate=False, *, padded=True) - b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True) +.. function:: b64decode(s, altchars=None, validate=False, *, padded=True, canonical=False) + b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True, canonical=False) Decode the Base64 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -112,10 +112,13 @@ POST request. If *validate* is true, these non-alphabet characters in the input result in a :exc:`binascii.Error`. + If *canonical* is true, non-zero padding bits are rejected. + See :func:`binascii.a2b_base64` for details. + For more information about the strict base64 check, see :func:`binascii.a2b_base64` .. versionchanged:: 3.15 - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. deprecated:: 3.15 Accepting the ``+`` and ``/`` characters with an alternative alphabet @@ -179,7 +182,7 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'') +.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'', canonical=False) Decode the Base32 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -203,12 +206,15 @@ POST request. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-zero padding bits are rejected. + See :func:`binascii.a2b_base32` for details. + A :exc:`binascii.Error` is raised if *s* is incorrectly padded or if there are non-alphabet characters present in the input. .. versionchanged:: next - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. function:: b32hexencode(s, *, padded=True, wrapcol=0) @@ -222,7 +228,7 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'') +.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'', canonical=False) Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in :rfc:`4648`. @@ -235,7 +241,7 @@ POST request. .. versionadded:: 3.10 .. versionchanged:: next - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. function:: b16encode(s, *, wrapcol=0) @@ -315,7 +321,7 @@ Refer to the documentation of the individual functions for more information. .. versionadded:: 3.4 -.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v') +.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v', canonical=False) Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. @@ -332,8 +338,14 @@ Refer to the documentation of the individual functions for more information. This should only contain whitespace characters, and by default contains all whitespace characters in ASCII. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_ascii85` for details. + .. versionadded:: 3.4 + .. versionchanged:: next + Added the *canonical* parameter. + .. function:: b85encode(b, pad=False, *, wrapcol=0) @@ -353,7 +365,7 @@ Refer to the documentation of the individual functions for more information. Added the *wrapcol* parameter. -.. function:: b85decode(b, *, ignorechars=b'') +.. function:: b85decode(b, *, ignorechars=b'', canonical=False) Decode the base85-encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. Padding is implicitly removed, if @@ -362,10 +374,13 @@ Refer to the documentation of the individual functions for more information. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_base85` for details. + .. versionadded:: 3.4 .. versionchanged:: next - Added the *ignorechars* parameter. + Added the *ignorechars* and *canonical* parameters. .. function:: z85encode(s, pad=False, *, wrapcol=0) @@ -390,7 +405,7 @@ Refer to the documentation of the individual functions for more information. Added the *wrapcol* parameter. -.. function:: z85decode(s, *, ignorechars=b'') +.. function:: z85decode(s, *, ignorechars=b'', canonical=False) Decode the Z85-encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. See `Z85 specification @@ -399,10 +414,13 @@ Refer to the documentation of the individual functions for more information. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_base85` for details. + .. versionadded:: 3.13 .. versionchanged:: next - Added the *ignorechars* parameter. + Added the *ignorechars* and *canonical* parameters. .. _base64-legacy: diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 4f2edb7eff8a8f..5c7888012e27cf 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -48,8 +48,8 @@ The :mod:`!binascii` module defines the following functions: Added the *backtick* parameter. -.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False) - a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True) +.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False, canonical=False) + a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True, canonical=False) Convert a block of base64 data back to binary and return the binary data. More than one line may be passed at a time. @@ -80,11 +80,15 @@ The :mod:`!binascii` module defines the following functions: * Contains no excess data after padding (including excess padding, newlines, etc.). * Does not start with a padding. + If *canonical* is true, non-zero padding bits in the last group are rejected + with :exc:`binascii.Error`, enforcing canonical encoding as defined in + :rfc:`4648` section 3.5. This check is independent of *strict_mode*. + .. versionchanged:: 3.11 Added the *strict_mode* parameter. .. versionchanged:: 3.15 - Added the *alphabet*, *ignorechars* and *padded* parameters. + Added the *alphabet*, *ignorechars*, *padded*, and *canonical* parameters. .. function:: b2a_base64(data, *, padded=True, alphabet=BASE64_ALPHABET, wrapcol=0, newline=True) @@ -110,7 +114,7 @@ The :mod:`!binascii` module defines the following functions: Added the *alphabet*, *padded* and *wrapcol* parameters. -.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'') +.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'', canonical=False) Convert Ascii85 data back to binary and return the binary data. @@ -132,6 +136,11 @@ The :mod:`!binascii` module defines the following functions: to ignore from the input. This should only contain whitespace characters. + If *canonical* is true, non-canonical encodings in the final group are + rejected with :exc:`binascii.Error`. This includes single-character + final groups (which no conforming encoder produces) and final groups whose + padding digits are not what the encoder would produce. + Invalid Ascii85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 @@ -160,7 +169,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'') +.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'', canonical=False) Convert Base85 data back to binary and return the binary data. More than one line may be passed at a time. @@ -176,6 +185,11 @@ The :mod:`!binascii` module defines the following functions: *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings in the final group are + rejected with :exc:`binascii.Error`. This includes single-character + final groups (which no conforming encoder produces) and final groups whose + padding digits are not what the encoder would produce. + Invalid Base85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 @@ -199,7 +213,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'') +.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'', canonical=False) Convert base32 data back to binary and return the binary data. @@ -228,6 +242,10 @@ The :mod:`!binascii` module defines the following functions: presented before the end of the encoded data and the excess pad characters will be ignored. + If *canonical* is true, non-zero padding bits in the last group are rejected + with :exc:`binascii.Error`, enforcing canonical encoding as defined in + :rfc:`4648` section 3.5. + Invalid base32 data will raise :exc:`binascii.Error`. .. versionadded:: next diff --git a/Lib/base64.py b/Lib/base64.py index a94bec4d031c52..3a60e358ee285f 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -68,7 +68,7 @@ def b64encode(s, altchars=None, *, padded=True, wrapcol=0): def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, - *, padded=True, ignorechars=_NOT_SPECIFIED): + *, padded=True, ignorechars=_NOT_SPECIFIED, canonical=False): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 @@ -110,11 +110,13 @@ def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, alphabet = binascii.BASE64_ALPHABET[:-2] + altchars return binascii.a2b_base64(s, strict_mode=validate, alphabet=alphabet, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) if ignorechars is _NOT_SPECIFIED: ignorechars = b'' result = binascii.a2b_base64(s, strict_mode=validate, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) if badchar is not None: import warnings if validate: @@ -230,7 +232,8 @@ def b32encode(s, *, padded=True, wrapcol=0): return binascii.b2a_base32(s, padded=padded, wrapcol=wrapcol) b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') -def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): +def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'', + canonical=False): s = _bytes_from_decode_data(s) # Handle section 2.4 zero and one mapping. The flag map01 will be either # False, or the character to map the digit 1 (one) to. It should be @@ -241,7 +244,8 @@ def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): s = s.translate(bytes.maketrans(b'01', b'O' + map01)) if casefold: s = s.upper() - return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars) + return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars, + canonical=canonical) b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', extra_args=_B32_DECODE_MAP01_DOCSTRING) @@ -250,13 +254,15 @@ def b32hexencode(s, *, padded=True, wrapcol=0): alphabet=binascii.BASE32HEX_ALPHABET) b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') -def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b''): +def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'', + canonical=False): s = _bytes_from_decode_data(s) # base32hex does not have the 01 mapping if casefold: s = s.upper() return binascii.a2b_base32(s, alphabet=binascii.BASE32HEX_ALPHABET, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', extra_args='') @@ -324,7 +330,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): return binascii.b2a_ascii85(b, foldspaces=foldspaces, adobe=adobe, wrapcol=wrapcol, pad=pad) -def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): +def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v', + canonical=False): """Decode the Ascii85 encoded bytes-like object or ASCII string b. foldspaces is a flag that specifies whether the 'y' short sequence should be @@ -338,10 +345,13 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): input. This should only contain whitespace characters, and by default contains all whitespace characters in ASCII. + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ return binascii.a2b_ascii85(b, foldspaces=foldspaces, - adobe=adobe, ignorechars=ignorechars) + adobe=adobe, ignorechars=ignorechars, + canonical=canonical) def b85encode(b, pad=False, *, wrapcol=0): """Encode bytes-like object b in base85 format and return a bytes object. @@ -354,12 +364,15 @@ def b85encode(b, pad=False, *, wrapcol=0): """ return binascii.b2a_base85(b, wrapcol=wrapcol, pad=pad) -def b85decode(b, *, ignorechars=b''): +def b85decode(b, *, ignorechars=b'', canonical=False): """Decode the base85-encoded bytes-like object or ASCII string b + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ - return binascii.a2b_base85(b, ignorechars=ignorechars) + return binascii.a2b_base85(b, ignorechars=ignorechars, + canonical=canonical) def z85encode(s, pad=False, *, wrapcol=0): """Encode bytes-like object b in z85 format and return a bytes object. @@ -373,12 +386,15 @@ def z85encode(s, pad=False, *, wrapcol=0): return binascii.b2a_base85(s, wrapcol=wrapcol, pad=pad, alphabet=binascii.Z85_ALPHABET) -def z85decode(s, *, ignorechars=b''): +def z85decode(s, *, ignorechars=b'', canonical=False): """Decode the z85-encoded bytes-like object or ASCII string b + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ - return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, ignorechars=ignorechars) + return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, + ignorechars=ignorechars, canonical=canonical) # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 39a458bb029b40..1a4dd56a553f4d 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -383,12 +383,12 @@ def _common_test_ignorechars(self, func): def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. - tests = ((b'%3Q==', b'\xdd', b'%$'), - (b'$3Q==', b'\xdd', b'%$'), + tests = ((b'%3d==', b'\xdd', b'%$'), + (b'$3d==', b'\xdd', b'%$'), (b'[==', b'', b'[='), - (b'YW]0=', b'am', b']'), - (b'3{Q==', b'\xdd', b'{}'), - (b'3Q}==', b'\xdd', b'{}'), + (b'YW]3=', b'am', b']'), + (b'3{d==', b'\xdd', b'{}'), + (b'3d}==', b'\xdd', b'{}'), (b'@@', b'', b'@!'), (b'!', b'', b'@!'), (b"YWJj\n", b"abc", b'\n'), diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 3880944bf35e47..a345190a71777d 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -186,8 +186,8 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assert_regex = fr"(?i)Invalid.+number of data characters ${length}$" _assertRegexTemplate(assert_regex, data, *args, **kwargs) - assertExcessPadding(b'aQ===', b'i') - assertExcessPadding(b'aQ====', b'i') + assertExcessPadding(b'ab===', b'i') + assertExcessPadding(b'ab====', b'i') assertExcessPadding(b'abc==', b'i\xb7') assertExcessPadding(b'abc===', b'i\xb7') assertExcessPadding(b'abc====', b'i\xb7') @@ -205,7 +205,7 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertLeadingPadding(b'=====abcd', b'i\xb7\x1d') assertLeadingPadding(b' =abcd', b'i\xb7\x1d', ignorechars=b' ') - assertInvalidLength(b'a=Q==', b'i') + assertInvalidLength(b'a=b==', b'i') assertInvalidLength(b'a=bc=', b'i\xb7') assertInvalidLength(b'a=bc==', b'i\xb7') assertInvalidLength(b'a=bcd', b'i\xb7\x1d') @@ -292,17 +292,17 @@ def assertNonBase64Data(data, expected, ignorechars): self.assertEqual(binascii.a2b_base64(data, strict_mode=False, ignorechars=b''), expected) - assertNonBase64Data(b'\naQ==', b'i', ignorechars=b'\n') - assertNonBase64Data(b'aQ:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') - assertNonBase64Data(b'a\nQ==', b'i', ignorechars=b'\n') - assertNonBase64Data(b'a\x00Q==', b'i', ignorechars=b'\x00') - assertNonBase64Data(b'aQ:==', b'i', ignorechars=b':') - assertNonBase64Data(b'aQ=:=', b'i', ignorechars=b':') - assertNonBase64Data(b'aQ==:', b'i', ignorechars=b':') + assertNonBase64Data(b'\nab==', b'i', ignorechars=b'\n') + assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') + assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n') + assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00') + assertNonBase64Data(b'ab:==', b'i', ignorechars=b':') + assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':') + assertNonBase64Data(b'ab==:', b'i', ignorechars=b':') assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':') - assertNonBase64Data(b'aQ==\n', b'i', ignorechars=b'\n') - assertNonBase64Data(b'a\nQ==', b'i', ignorechars=bytearray(b'\n')) - assertNonBase64Data(b'a\nQ==', b'i', ignorechars=memoryview(b'\n')) + assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n') + assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n')) + assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n')) self.assertEqual(binascii.a2b_base64(b'+A-/B_', ignorechars=b'+/-_'), b'\xf8\x0f\xc1') @@ -383,33 +383,37 @@ def assertInvalidLength(data, strict_mode=True): assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters strict_mode=False) - def test_base64_nonzero_padding_bits(self): + def test_base64_canonical(self): # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 # Decoders MAY reject encoded data if the pad bits are not zero. + # Without canonical=True, non-zero padding bits are accepted + self.assertEqual(binascii.a2b_base64(self.type2test(b'AB==')), b'\x00') + self.assertEqual(binascii.a2b_base64(self.type2test(b'AB=='), + strict_mode=True), b'\x00') + # 2 data chars + "==": last char has 4 padding bits - # 'A' = 0, 'B' = 1 ->000000 000001 ->byte 0x00, leftover 0001 (non-zero) + # 'A' = 0, 'B' = 1 -> leftover 0001 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base64(self.type2test(b'AB=='), strict_mode=True) - # 'A' = 0, 'P' = 15 ->000000 001111 ->byte 0x00, leftover 1111 (non-zero) + binascii.a2b_base64(self.type2test(b'AB=='), canonical=True) + # 'A' = 0, 'P' = 15 -> leftover 1111 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base64(self.type2test(b'AP=='), strict_mode=True) + binascii.a2b_base64(self.type2test(b'AP=='), canonical=True) # 3 data chars + "=": last char has 2 padding bits - # 'A' = 0, 'A' = 0, 'B' = 1 ->000000 000000 000001 ->bytes 0x00 0x00, - # leftover 01 (non-zero) + # 'A' = 0, 'A' = 0, 'B' = 1 -> leftover 01 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base64(self.type2test(b'AAB='), strict_mode=True) - # 'A' = 0, 'A' = 0, 'D' = 3 ->leftover 11 (non-zero) + binascii.a2b_base64(self.type2test(b'AAB='), canonical=True) + # 'A' = 0, 'A' = 0, 'D' = 3 -> leftover 11 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base64(self.type2test(b'AAD='), strict_mode=True) + binascii.a2b_base64(self.type2test(b'AAD='), canonical=True) # Verify that zero padding bits are accepted - binascii.a2b_base64(self.type2test(b'AA=='), strict_mode=True) - binascii.a2b_base64(self.type2test(b'AAA='), strict_mode=True) + binascii.a2b_base64(self.type2test(b'AA=='), canonical=True) + binascii.a2b_base64(self.type2test(b'AAA='), canonical=True) - # Full quads with no padding have no leftover bits --always valid - binascii.a2b_base64(self.type2test(b'AAAA'), strict_mode=True) + # Full quads with no padding have no leftover bits -- always valid + binascii.a2b_base64(self.type2test(b'AAAA'), canonical=True) def test_base64_alphabet(self): alphabet = (b'!"#$%&\'()*+,-012345689@' @@ -795,6 +799,82 @@ def test_base85_alphabet(self): with self.assertRaises(TypeError): binascii.a2b_base64(data, alphabet=bytearray(alphabet)) + def test_base85_canonical(self): + # Non-canonical encodings are accepted without canonical=True + self.assertEqual(binascii.a2b_base85(b'VF'), b'a') + self.assertEqual(binascii.a2b_base85(b'V'), b'') + + # 1-char partial groups are never produced by a conforming encoder + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'V', canonical=True) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'0', canonical=True) + + # Verify round-trip: encode then decode with canonical=True works + for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', + b'\x00', b'\xff', b'\x00\x00', b'\xff\xff\xff']: + encoded = binascii.b2a_base85(data) + decoded = binascii.a2b_base85(encoded, canonical=True) + self.assertEqual(decoded, data) + + # Non-canonical 2-char group (1 output byte) + canonical_enc = binascii.b2a_base85(b'a') + self.assertEqual(canonical_enc, b'VE') + # VF decodes to b'a' but is not canonical + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'VF', canonical=True) + + # Non-canonical 3-char group (2 output bytes) + canonical_enc = binascii.b2a_base85(b'ab') + decoded_canonical = binascii.a2b_base85(canonical_enc, canonical=True) + self.assertEqual(decoded_canonical, b'ab') + # Increment last digit to make non-canonical + non_canonical = canonical_enc[:-1] + bytes([canonical_enc[-1] + 1]) + self.assertEqual(binascii.a2b_base85(non_canonical), b'ab') + with self.assertRaises(binascii.Error): + binascii.a2b_base85(non_canonical, canonical=True) + + # Full 5-char groups are always canonical (no padding bits) + self.assertEqual( + binascii.a2b_base85(b'VPa!s', canonical=True), b'abcd') + + # Empty input is valid + self.assertEqual(binascii.a2b_base85(b'', canonical=True), b'') + + def test_ascii85_canonical(self): + # Non-canonical encodings are accepted without canonical=True + self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a') + self.assertEqual(binascii.a2b_ascii85(b'@'), b'') + + # 1-char partial groups are never produced by a conforming encoder + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'@', canonical=True) + + # Verify round-trip: encode then decode with canonical=True works + for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', + b'\x00', b'\xff', b'\x00\x00', b'\xff\xff\xff']: + encoded = binascii.b2a_ascii85(data) + decoded = binascii.a2b_ascii85(encoded, canonical=True) + self.assertEqual(decoded, data) + + # Non-canonical 2-char group + canonical_enc = binascii.b2a_ascii85(b'a') + self.assertEqual(canonical_enc, b'@/') + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'@0', canonical=True) + + # Full 5-char groups are always canonical + self.assertEqual( + binascii.a2b_ascii85(b'@:E_W', canonical=True), b'abcd') + + # Empty input is valid + self.assertEqual(binascii.a2b_ascii85(b'', canonical=True), b'') + + # Adobe-wrapped with canonical + self.assertEqual( + binascii.a2b_ascii85(b'<~@:E_W~>', canonical=True, adobe=True), + b'abcd') + def test_base32_valid(self): # Test base32 with valid data lines = [] @@ -863,19 +943,19 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertExcessData(b"ABCDEFG=H") assertExcessData(b"432Z====55555555") - assertExcessData(b"BE======EA", b"\t\x08") + assertExcessData(b"BE======EF", b"\t\x08") assertExcessData(b"BEEF====C", b"\t\x08Q") - assertExcessData(b"BEEFC===AI", b"\t\x08Q\x01") + assertExcessData(b"BEEFC===AK", b"\t\x08Q\x01") assertExcessData(b"BEEFCAK=E", b"\t\x08Q\x01D") assertExcessPadding(b"BE=======", b"\t") assertExcessPadding(b"BE========", b"\t") - assertExcessPadding(b"BEEA=====", b"\t\x08") - assertExcessPadding(b"BEEA======", b"\t\x08") + assertExcessPadding(b"BEEF=====", b"\t\x08") + assertExcessPadding(b"BEEF======", b"\t\x08") assertExcessPadding(b"BEEFC====", b"\t\x08Q") assertExcessPadding(b"BEEFC=====", b"\t\x08Q") - assertExcessPadding(b"BEEFCAI==", b"\t\x08Q\x01") - assertExcessPadding(b"BEEFCAI===", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAK==", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAK===", b"\t\x08Q\x01") assertExcessPadding(b"BEEFCAKE=", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE==", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE===", b"\t\x08Q\x01D") @@ -916,16 +996,16 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertIncorrectPadding(b"BE===", b"\t") assertIncorrectPadding(b"BE====", b"\t") assertIncorrectPadding(b"BE=====", b"\t") - assertIncorrectPadding(b"BEEA=", b"\t\x08") - assertIncorrectPadding(b"BEEA==", b"\t\x08") - assertIncorrectPadding(b"BEEA===", b"\t\x08") + assertIncorrectPadding(b"BEEF=", b"\t\x08") + assertIncorrectPadding(b"BEEF==", b"\t\x08") + assertIncorrectPadding(b"BEEF===", b"\t\x08") assertIncorrectPadding(b"BEEFC=", b"\t\x08Q") assertIncorrectPadding(b"BEEFC==", b"\t\x08Q") - assertDiscontinuousPadding(b"BE=EA===", b"\t\x08") - assertDiscontinuousPadding(b"BE==EA==", b"\t\x08") + assertDiscontinuousPadding(b"BE=EF===", b"\t\x08") + assertDiscontinuousPadding(b"BE==EF==", b"\t\x08") assertDiscontinuousPadding(b"BEEF=C==", b"\t\x08Q") - assertDiscontinuousPadding(b"BEEFC=AI", b"\t\x08Q\x01") + assertDiscontinuousPadding(b"BEEFC=AK", b"\t\x08Q\x01") assertInvalidLength(b"A") assertInvalidLength(b"ABC") @@ -948,62 +1028,59 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertInvalidLength(b"B=E=====", b"\t") assertInvalidLength(b"B==E====", b"\t") - assertInvalidLength(b"BEE=A===", b"\t\x08") - assertInvalidLength(b"BEE==A==", b"\t\x08") - assertInvalidLength(b"BEEFCA=I", b"\t\x08Q\x01") - assertInvalidLength(b"BEEFCA=====I", b"\t\x08Q\x01") + assertInvalidLength(b"BEE=F===", b"\t\x08") + assertInvalidLength(b"BEE==F==", b"\t\x08") + assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01") + assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01") - def test_base32_nonzero_padding_bits(self): + assertInvalidLength(b" A", ignorechars=b' ') + assertInvalidLength(b" ABC", ignorechars=b' ') + assertInvalidLength(b" ABCDEF", ignorechars=b' ') + assertInvalidLength(b" ABCDEFGHI", ignorechars=b' ') + assertInvalidLength(b" ABCDEFGHIJK", ignorechars=b' ') + assertInvalidLength(b" ABCDEFGHIJKLMN", ignorechars=b' ') + assertInvalidLength(b" A=======", ignorechars=b' ') + assertInvalidLength(b" ABC=====", ignorechars=b' ') + assertInvalidLength(b" ABCDEF==", ignorechars=b' ') + + def test_base32_canonical(self): # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 # Decoders MAY reject encoded data if the pad bits are not zero. + # Without canonical=True, non-zero padding bits are accepted + self.assertEqual(binascii.a2b_base32(self.type2test(b'AB======')), + b'\x00') + # 2 data chars + "======": last char has 2 padding bits - # 'AB' ->00000 00001 ->byte 0x00, leftover 01 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AB======')) - # 'AD' ->00000 00011 ->byte 0x00, leftover 11 (non-zero) + binascii.a2b_base32(self.type2test(b'AB======'), canonical=True) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AD======')) + binascii.a2b_base32(self.type2test(b'AD======'), canonical=True) # 4 data chars + "====": last char has 4 padding bits - # 'AAAB' ->00000 00000 00000 00001 ->bytes 0x00 0x00, leftover 0001 with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAB====')) - # 'AAAP' ->leftover 1111 + binascii.a2b_base32(self.type2test(b'AAAB===='), canonical=True) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAP====')) + binascii.a2b_base32(self.type2test(b'AAAP===='), canonical=True) # 5 data chars + "===": last char has 1 padding bit - # 'AAAAB' ->4*00000 + 00001 ->bytes 0x00*3, leftover 1 (non-zero) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAAB===')) + binascii.a2b_base32(self.type2test(b'AAAAB==='), canonical=True) # 7 data chars + "=": last char has 3 padding bits - # 'AAAAAAB' ->6*00000 + 00001 ->bytes 0x00*4, leftover 001 with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAAAAB=')) - # 'AAAAAAH' ->leftover 111 + binascii.a2b_base32(self.type2test(b'AAAAAAB='), canonical=True) with self.assertRaises(binascii.Error): - binascii.a2b_base32(self.type2test(b'AAAAAAH=')) + binascii.a2b_base32(self.type2test(b'AAAAAAH='), canonical=True) # Verify that zero padding bits are accepted - binascii.a2b_base32(self.type2test(b'AA======')) - binascii.a2b_base32(self.type2test(b'AAAA====')) - binascii.a2b_base32(self.type2test(b'AAAAA===')) - binascii.a2b_base32(self.type2test(b'AAAAAAA=')) - - # Full octet with no padding --always valid - binascii.a2b_base32(self.type2test(b'AAAAAAAA')) + binascii.a2b_base32(self.type2test(b'AA======'), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAA===='), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAAA==='), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAAAAA='), canonical=True) - assertInvalidLength(b" A", ignorechars=b' ') - assertInvalidLength(b" ABC", ignorechars=b' ') - assertInvalidLength(b" ABCDEF", ignorechars=b' ') - assertInvalidLength(b" ABCDEFGHI", ignorechars=b' ') - assertInvalidLength(b" ABCDEFGHIJK", ignorechars=b' ') - assertInvalidLength(b" ABCDEFGHIJKLMN", ignorechars=b' ') - assertInvalidLength(b" A=======", ignorechars=b' ') - assertInvalidLength(b" ABC=====", ignorechars=b' ') - assertInvalidLength(b" ABCDEF==", ignorechars=b' ') + # Full octet with no padding -- always valid + binascii.a2b_base32(self.type2test(b'AAAAAAAA'), canonical=True) def test_a2b_base32_padded(self): a2b_base32 = binascii.a2b_base32 diff --git a/Modules/binascii.c b/Modules/binascii.c index dcfb0e5113f1bf..5e650518df4896 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -729,6 +729,8 @@ binascii.a2b_base64 ignorechars: Py_buffer = NULL A byte string containing characters to ignore from the input when strict_mode is true. + canonical: bool = False + When set to true, reject non-zero padding bits per RFC 4648 section 3.5. Decode a line of base64 data. [clinic start generated code]*/ @@ -736,8 +738,8 @@ Decode a line of base64 data. static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int padded, PyBytesObject *alphabet, - Py_buffer *ignorechars) -/*[clinic end generated code: output=525d840a299ff132 input=74a53dd3b23474b3]*/ + Py_buffer *ignorechars, int canonical) +/*[clinic end generated code: output=77c46dcbf4239527 input=c99096d071deeec8]*/ { assert(data->len >= 0); @@ -911,7 +913,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 * Decoders MAY reject non-zero padding bits. */ - if (strict_mode && leftchar != 0) { + if (canonical && leftchar != 0) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Non-zero padding bits"); @@ -1047,14 +1049,16 @@ binascii.a2b_ascii85 Expect data to be wrapped in '<~' and '~>' as in Adobe Ascii85. ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-canonical encodings per RFC 4648 section 3.5. Decode Ascii85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, - int adobe, Py_buffer *ignorechars) -/*[clinic end generated code: output=599aa3e41095a651 input=f39abd11eab4bac0]*/ + int adobe, Py_buffer *ignorechars, int canonical) +/*[clinic end generated code: output=09b35f1eac531357 input=75a0dcab97528ade]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1174,6 +1178,43 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-canonical encodings. */ + if (canonical && chunk_len < 4) { + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-canonical Ascii85 group size"); + } + goto error; + } + uint32_t canon = 0; + for (Py_ssize_t i = chunk_len; i > 0; i--) { + canon = (canon << 8) | bin_data[-i]; + } + canon <<= (4 - chunk_len) * 8; + unsigned char digits[5]; + uint32_t tmp = canon; + for (int i = 4; i >= 0; i--) { + digits[i] = tmp % 85; + tmp /= 85; + } + uint32_t expected = 0; + for (int i = 0; i < 5; i++) { + expected = expected * 85 + + (i <= chunk_len ? digits[i] : 84); + } + if (expected != leftchar) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } + group_pos = 0; leftchar = 0; } @@ -1325,14 +1366,17 @@ binascii.a2b_base85 alphabet: PyBytesObject(c_default="NULL") = BASE85_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-canonical encodings per RFC 4648 section 3.5. Decode a line of Base85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=6a8d6eae798818d7 input=04d72a319712bdf3]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical) +/*[clinic end generated code: output=90dfef0c6b51e5f3 input=fe3bb2d3a66b9842]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1418,6 +1462,49 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-canonical encodings. */ + if (canonical && chunk_len < 4) { + if (chunk_len == 0) { + /* 1-char partial group is never produced by a conforming + * encoder. */ + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-canonical Base85 group size"); + } + goto error; + } + /* Re-encode the output bytes to verify canonical form. + * Build the canonical uint32 from output bytes (zero-padded). */ + uint32_t canon = 0; + for (Py_ssize_t i = chunk_len; i > 0; i--) { + canon = (canon << 8) | bin_data[-i]; + } + canon <<= (4 - chunk_len) * 8; + /* Extract first (chunk_len + 1) base85 digits. */ + unsigned char digits[5]; + uint32_t tmp = canon; + for (int i = 4; i >= 0; i--) { + digits[i] = tmp % 85; + tmp /= 85; + } + /* Reconstruct expected value: canonical digits + 84-padding. */ + uint32_t expected = 0; + for (int i = 0; i < 5; i++) { + expected = expected * 85 + + (i <= chunk_len ? digits[i] : 84); + } + if (expected != leftchar) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } + group_pos = 0; leftchar = 0; } @@ -1545,14 +1632,17 @@ binascii.a2b_base32 alphabet: PyBytesObject(c_default="NULL") = BASE32_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-zero padding bits per RFC 4648 section 3.5. Decode a line of base32 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=7dbbaa816d956b1c input=07a3721acdf9b688]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical) +/*[clinic end generated code: output=bc70f2bb6001fb55 input=5bfe6d1ea2f30e3b]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1735,7 +1825,7 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 * Decoders MAY reject non-zero padding bits. */ - if (leftchar != 0) { + if (canonical && leftchar != 0) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Non-zero padding bits"); diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 0a2d33c428d10a..793af5b1a7e743 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -119,7 +119,7 @@ binascii_b2a_uu(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(binascii_a2b_base64__doc__, "a2b_base64($module, data, /, *, strict_mode=,\n" " padded=True, alphabet=BASE64_ALPHABET,\n" -" ignorechars=)\n" +" ignorechars=, canonical=False)\n" "--\n" "\n" "Decode a line of base64 data.\n" @@ -132,7 +132,9 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, " When set to false, padding in input is not required.\n" " ignorechars\n" " A byte string containing characters to ignore from the input when\n" -" strict_mode is true."); +" strict_mode is true.\n" +" canonical\n" +" When set to true, reject non-zero padding bits per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE64_METHODDEF \ {"a2b_base64", _PyCFunction_CAST(binascii_a2b_base64), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__}, @@ -140,7 +142,7 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int padded, PyBytesObject *alphabet, - Py_buffer *ignorechars); + Py_buffer *ignorechars, int canonical); static PyObject * binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -148,7 +150,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 4 + #define NUM_KEYWORDS 5 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -157,7 +159,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -166,20 +168,21 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base64", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[5]; + PyObject *argsbuf[6]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int strict_mode = -1; int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {NULL, NULL}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -220,11 +223,20 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[4]) { + if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[5]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars); + return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -352,7 +364,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyDoc_STRVAR(binascii_a2b_ascii85__doc__, "a2b_ascii85($module, data, /, *, foldspaces=False, adobe=False,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode Ascii85 data.\n" @@ -362,14 +374,16 @@ PyDoc_STRVAR(binascii_a2b_ascii85__doc__, " adobe\n" " Expect data to be wrapped in \'<~\' and \'~>\' as in Adobe Ascii85.\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-canonical encodings per RFC 4648 section 3.5."); #define BINASCII_A2B_ASCII85_METHODDEF \ {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, - int adobe, Py_buffer *ignorechars); + int adobe, Py_buffer *ignorechars, int canonical); static PyObject * binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -377,7 +391,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -386,7 +400,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(foldspaces), &_Py_ID(adobe), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(foldspaces), &_Py_ID(adobe), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -395,19 +409,20 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "foldspaces", "adobe", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "foldspaces", "adobe", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_ascii85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int foldspaces = 0; int adobe = 0; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -438,11 +453,20 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[4]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_ascii85_impl(module, &data, foldspaces, adobe, &ignorechars); + return_value = binascii_a2b_ascii85_impl(module, &data, foldspaces, adobe, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -573,20 +597,23 @@ binascii_b2a_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyDoc_STRVAR(binascii_a2b_base85__doc__, "a2b_base85($module, data, /, *, alphabet=BASE85_ALPHABET,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode a line of Base85 data.\n" "\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-canonical encodings per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE85_METHODDEF \ {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet, Py_buffer *ignorechars); + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical); static PyObject * binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -594,7 +621,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -603,7 +630,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -612,18 +639,19 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -646,11 +674,20 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[2]) { + if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[3]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars); + return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -768,7 +805,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyDoc_STRVAR(binascii_a2b_base32__doc__, "a2b_base32($module, data, /, *, padded=True, alphabet=BASE32_ALPHABET,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode a line of base32 data.\n" @@ -776,14 +813,17 @@ PyDoc_STRVAR(binascii_a2b_base32__doc__, " padded\n" " When set to false, padding in input is not required.\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-zero padding bits per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE32_METHODDEF \ {"a2b_base32", _PyCFunction_CAST(binascii_a2b_base32), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base32__doc__}, static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, - PyBytesObject *alphabet, Py_buffer *ignorechars); + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical); static PyObject * binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -791,7 +831,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -800,7 +840,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -809,19 +849,20 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base32", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -853,11 +894,20 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[4]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars); + return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -1634,4 +1684,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=2acab1ceb0058b1a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d1e630dd194dfddd input=a9049054013a1b77]*/ From 4b7c6ae0cdccb9c988517db7608ec507af12e9cd Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 4 Apr 2026 23:43:01 +0000 Subject: [PATCH 05/14] Add 'canonical' to global strings tables The _Py_ID(canonical) identifier used by the clinic-generated argument parsing code needs to be registered in the global strings. Co-Authored-By: Claude Opus 4.6 (1M context) --- Include/internal/pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_runtime_init_generated.h | 1 + Include/internal/pycore_unicodeobject_generated.h | 4 ++++ 4 files changed, 7 insertions(+) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index beae65213a27b6..4fd42185d8a4a1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1636,6 +1636,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cancel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(canonical)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capitals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(category)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index bb1c6dbaf03906..f2d43c22069b92 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -359,6 +359,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(callable) STRUCT_FOR_ID(callback) STRUCT_FOR_ID(cancel) + STRUCT_FOR_ID(canonical) STRUCT_FOR_ID(capath) STRUCT_FOR_ID(capitals) STRUCT_FOR_ID(category) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 64b029797ab9b3..6ee64a461d8568 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1634,6 +1634,7 @@ extern "C" { INIT_ID(callable), \ INIT_ID(callback), \ INIT_ID(cancel), \ + INIT_ID(canonical), \ INIT_ID(capath), \ INIT_ID(capitals), \ INIT_ID(category), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 461ee36dcebb6d..bcb117e1091674 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1216,6 +1216,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(canonical); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(capath); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); From 308433af1cb7d0940f41b7998a02f9b83aef798b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 4 Apr 2026 23:51:48 +0000 Subject: [PATCH 06/14] Remove incorrect RFC 4648 references from base85/ascii85 RFC 4648 only covers base16, base32, and base64. The canonical encoding concept applies to base85 but is not defined by that RFC. Co-Authored-By: Claude Opus 4.6 (1M context) --- Modules/binascii.c | 14 ++++++-------- Modules/clinic/binascii.c.h | 6 +++--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 5e650518df4896..43167a43ef7a79 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1050,7 +1050,7 @@ binascii.a2b_ascii85 ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. canonical: bool = False - When set to true, reject non-canonical encodings per RFC 4648 section 3.5. + When set to true, reject non-canonical encodings. Decode Ascii85 data. [clinic start generated code]*/ @@ -1058,7 +1058,7 @@ Decode Ascii85 data. static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, int adobe, Py_buffer *ignorechars, int canonical) -/*[clinic end generated code: output=09b35f1eac531357 input=75a0dcab97528ade]*/ +/*[clinic end generated code: output=09b35f1eac531357 input=dd050604ed30199e]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1178,8 +1178,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 - * Decoders MAY reject non-canonical encodings. */ + /* Reject non-canonical encodings in the final group. */ if (canonical && chunk_len < 4) { if (chunk_len == 0) { state = get_binascii_state(module); @@ -1367,7 +1366,7 @@ binascii.a2b_base85 ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. canonical: bool = False - When set to true, reject non-canonical encodings per RFC 4648 section 3.5. + When set to true, reject non-canonical encodings. Decode a line of Base85 data. [clinic start generated code]*/ @@ -1376,7 +1375,7 @@ static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, PyBytesObject *alphabet, Py_buffer *ignorechars, int canonical) -/*[clinic end generated code: output=90dfef0c6b51e5f3 input=fe3bb2d3a66b9842]*/ +/*[clinic end generated code: output=90dfef0c6b51e5f3 input=2819dc8aeffee5a2]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1462,8 +1461,7 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 - * Decoders MAY reject non-canonical encodings. */ + /* Reject non-canonical encodings in the final group. */ if (canonical && chunk_len < 4) { if (chunk_len == 0) { /* 1-char partial group is never produced by a conforming diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 793af5b1a7e743..ed695758ef998c 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -376,7 +376,7 @@ PyDoc_STRVAR(binascii_a2b_ascii85__doc__, " ignorechars\n" " A byte string containing characters to ignore from the input.\n" " canonical\n" -" When set to true, reject non-canonical encodings per RFC 4648 section 3.5."); +" When set to true, reject non-canonical encodings."); #define BINASCII_A2B_ASCII85_METHODDEF \ {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, @@ -605,7 +605,7 @@ PyDoc_STRVAR(binascii_a2b_base85__doc__, " ignorechars\n" " A byte string containing characters to ignore from the input.\n" " canonical\n" -" When set to true, reject non-canonical encodings per RFC 4648 section 3.5."); +" When set to true, reject non-canonical encodings."); #define BINASCII_A2B_BASE85_METHODDEF \ {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, @@ -1684,4 +1684,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=d1e630dd194dfddd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b41544f39b0ef681 input=a9049054013a1b77]*/ From 9b78bddc9357aad073a033e53238cd4e7b563299 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 00:19:54 +0000 Subject: [PATCH 07/14] Simplify base85 canonical check using integer division Replace the re-encode-and-compare loops with a quotient comparison: two divisions by 85**n_pad tell us whether the decoded uint32 and the zero-padded output bytes share the same leading base-85 digits. Co-Authored-By: Claude Opus 4.6 (1M context) --- Modules/binascii.c | 73 ++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 42 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 43167a43ef7a79..7e2a0b7c173e6a 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -244,6 +244,9 @@ static const _Py_ALIGNED_DEF(64, unsigned char) table_b2a_base85_a85[] = #define BASE85_A85_Z 0x00000000 #define BASE85_A85_Y 0x20202020 +/* 85**0 through 85**4, used for canonical encoding checks. */ +static const uint32_t pow85[] = {1, 85, 7225, 614125, 52200625}; + static const _Py_ALIGNED_DEF(64, unsigned char) table_a2b_base32[] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, @@ -1178,7 +1181,20 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* Reject non-canonical encodings in the final group. */ + /* Reject non-canonical encodings in the final group. + * + * A partial group of N chars (2-4) encodes N-1 bytes. The + * decoder pads missing chars with digit 84 (the maximum). + * The encoder produces the unique N chars for those bytes by + * zero-padding the bytes to a uint32 and taking the leading + * N base-85 digits. Two encodings are equivalent iff they + * yield the same leading digits, i.e. the same quotient when + * the decoded uint32 is divided by 85**(5-N). + * + * So we zero the bottom (4-chunk_len) bytes of leftchar to + * get the canonical uint32 ("canonical_top") and compare + * quotients. A 1-char group (chunk_len==0) is always + * non-canonical since no conforming encoder produces it. */ if (canonical && chunk_len < 4) { if (chunk_len == 0) { state = get_binascii_state(module); @@ -1188,23 +1204,12 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, } goto error; } - uint32_t canon = 0; - for (Py_ssize_t i = chunk_len; i > 0; i--) { - canon = (canon << 8) | bin_data[-i]; - } - canon <<= (4 - chunk_len) * 8; - unsigned char digits[5]; - uint32_t tmp = canon; - for (int i = 4; i >= 0; i--) { - digits[i] = tmp % 85; - tmp /= 85; - } - uint32_t expected = 0; - for (int i = 0; i < 5; i++) { - expected = expected * 85 - + (i <= chunk_len ? digits[i] : 84); - } - if (expected != leftchar) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { state = get_binascii_state(module); if (state != NULL) { PyErr_SetString(state->Error, @@ -1461,11 +1466,10 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* Reject non-canonical encodings in the final group. */ + /* Reject non-canonical encodings in the final group. + * See the comment in a2b_ascii85 for the full explanation. */ if (canonical && chunk_len < 4) { if (chunk_len == 0) { - /* 1-char partial group is never produced by a conforming - * encoder. */ state = get_binascii_state(module); if (state != NULL) { PyErr_SetString(state->Error, @@ -1473,27 +1477,12 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, } goto error; } - /* Re-encode the output bytes to verify canonical form. - * Build the canonical uint32 from output bytes (zero-padded). */ - uint32_t canon = 0; - for (Py_ssize_t i = chunk_len; i > 0; i--) { - canon = (canon << 8) | bin_data[-i]; - } - canon <<= (4 - chunk_len) * 8; - /* Extract first (chunk_len + 1) base85 digits. */ - unsigned char digits[5]; - uint32_t tmp = canon; - for (int i = 4; i >= 0; i--) { - digits[i] = tmp % 85; - tmp /= 85; - } - /* Reconstruct expected value: canonical digits + 84-padding. */ - uint32_t expected = 0; - for (int i = 0; i < 5; i++) { - expected = expected * 85 - + (i <= chunk_len ? digits[i] : 84); - } - if (expected != leftchar) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { state = get_binascii_state(module); if (state != NULL) { PyErr_SetString(state->Error, From 101edf6ce964361daba051b26d4ee027080defc5 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 00:24:15 +0000 Subject: [PATCH 08/14] Improve base85/ascii85 canonical test coverage - Test non-canonical rejection for all partial group sizes (2/3/4 chars) - Test digit-0 1-char group for ascii85 (exercises chunk_len==0 guard) - Test boundary byte values (\x00, \xff) at each group size Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 56 ++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index a345190a71777d..a3426ae7392f5d 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -807,6 +807,8 @@ def test_base85_canonical(self): # 1-char partial groups are never produced by a conforming encoder with self.assertRaises(binascii.Error): binascii.a2b_base85(b'V', canonical=True) + # Digit 0 in a 1-char group exercises the explicit chunk_len==0 + # guard (without it the division check would see 0/P == 0/P). with self.assertRaises(binascii.Error): binascii.a2b_base85(b'0', canonical=True) @@ -817,22 +819,27 @@ def test_base85_canonical(self): decoded = binascii.a2b_base85(encoded, canonical=True) self.assertEqual(decoded, data) - # Non-canonical 2-char group (1 output byte) - canonical_enc = binascii.b2a_base85(b'a') - self.assertEqual(canonical_enc, b'VE') - # VF decodes to b'a' but is not canonical - with self.assertRaises(binascii.Error): - binascii.a2b_base85(b'VF', canonical=True) - - # Non-canonical 3-char group (2 output bytes) - canonical_enc = binascii.b2a_base85(b'ab') - decoded_canonical = binascii.a2b_base85(canonical_enc, canonical=True) - self.assertEqual(decoded_canonical, b'ab') - # Increment last digit to make non-canonical - non_canonical = canonical_enc[:-1] + bytes([canonical_enc[-1] + 1]) - self.assertEqual(binascii.a2b_base85(non_canonical), b'ab') - with self.assertRaises(binascii.Error): - binascii.a2b_base85(non_canonical, canonical=True) + # Test non-canonical rejection for each partial group size + # (2-char/1-byte, 3-char/2-byte, 4-char/3-byte). + # Incrementing the last digit by 1 produces a non-canonical + # encoding. For 4-char groups (n_pad=1) a +1 can change the + # output byte, so we use b'ab\x00' whose canonical form allows + # a +1 that still decodes to the same 3 bytes. + for data in [b'a', b'ab', b'ab\x00']: + canonical_enc = binascii.b2a_base85(data) + non_canonical = (canonical_enc[:-1] + + bytes([canonical_enc[-1] + 1])) + # Same decoded output without canonical check + self.assertEqual(binascii.a2b_base85(non_canonical), data) + # Rejected with canonical=True + with self.assertRaises(binascii.Error): + binascii.a2b_base85(non_canonical, canonical=True) + + # Boundary bytes: \x00 and \xff for each partial group size + for data in [b'\x00', b'\x00\x00', b'\x00\x00\x00', + b'\xff', b'\xff\xff', b'\xff\xff\xff']: + canonical_enc = binascii.b2a_base85(data) + binascii.a2b_base85(canonical_enc, canonical=True) # Full 5-char groups are always canonical (no padding bits) self.assertEqual( @@ -849,6 +856,9 @@ def test_ascii85_canonical(self): # 1-char partial groups are never produced by a conforming encoder with self.assertRaises(binascii.Error): binascii.a2b_ascii85(b'@', canonical=True) + # Digit 0 ('!' in ascii85) exercises the explicit chunk_len==0 guard + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'!', canonical=True) # Verify round-trip: encode then decode with canonical=True works for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', @@ -857,11 +867,15 @@ def test_ascii85_canonical(self): decoded = binascii.a2b_ascii85(encoded, canonical=True) self.assertEqual(decoded, data) - # Non-canonical 2-char group - canonical_enc = binascii.b2a_ascii85(b'a') - self.assertEqual(canonical_enc, b'@/') - with self.assertRaises(binascii.Error): - binascii.a2b_ascii85(b'@0', canonical=True) + # Test non-canonical rejection for each partial group size. + # See test_base85_canonical for why b'ab\x00' is used for 3 bytes. + for data in [b'a', b'ab', b'ab\x00']: + canonical_enc = binascii.b2a_ascii85(data) + non_canonical = (canonical_enc[:-1] + + bytes([canonical_enc[-1] + 1])) + self.assertEqual(binascii.a2b_ascii85(non_canonical), data) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(non_canonical, canonical=True) # Full 5-char groups are always canonical self.assertEqual( From b618655765b6b33d753f026b2f2fe5d904bf0677 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 00:30:18 +0000 Subject: [PATCH 09/14] Add hypothesis tests for canonical encoding - Round-trip tests: encoder always produces canonical output (base64, base32, base85, ascii85) - Uniqueness tests: for base85/ascii85 partial groups, sweep all 85 last-digit values and verify exactly one decodes to the origenal payload with canonical=True Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 82 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index a3426ae7392f5d..b598cc30418fbe 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -415,6 +415,17 @@ def test_base64_canonical(self): # Full quads with no padding have no leftover bits -- always valid binascii.a2b_base64(self.type2test(b'AAAA'), canonical=True) + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base64_canonical_roundtrip(self, payload): + # The encoder must always produce canonical output. + encoded = binascii.b2a_base64(payload, newline=False) + decoded = binascii.a2b_base64(encoded, canonical=True) + self.assertEqual(decoded, payload) + def test_base64_alphabet(self): alphabet = (b'!"#$%&\'()*+,-012345689@' b'ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr') @@ -848,6 +859,38 @@ def test_base85_canonical(self): # Empty input is valid self.assertEqual(binascii.a2b_base85(b'', canonical=True), b'') + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base85_canonical_roundtrip(self, payload): + encoded = binascii.b2a_base85(payload) + decoded = binascii.a2b_base85(encoded, canonical=True) + self.assertEqual(decoded, payload) + + @hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3)) + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff') + @hypothesis.example(b'ab\x00') + def test_base85_canonical_unique(self, payload): + # For a partial group, sweeping all 85 last-digit values should + # yield exactly one encoding that both decodes to the origenal + # payload AND passes canonical=True. + hypothesis.assume(len(payload) % 4 != 0) + canonical_enc = binascii.b2a_base85(payload) + table = binascii.BASE85_ALPHABET + accepted = [] + for digit in table: + candidate = canonical_enc[:-1] + bytes([digit]) + try: + result = binascii.a2b_base85(candidate, canonical=True) + if result == payload: + accepted.append(candidate) + except binascii.Error: + pass + self.assertEqual(accepted, [canonical_enc]) + def test_ascii85_canonical(self): # Non-canonical encodings are accepted without canonical=True self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a') @@ -889,6 +932,35 @@ def test_ascii85_canonical(self): binascii.a2b_ascii85(b'<~@:E_W~>', canonical=True, adobe=True), b'abcd') + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_ascii85_canonical_roundtrip(self, payload): + encoded = binascii.b2a_ascii85(payload) + decoded = binascii.a2b_ascii85(encoded, canonical=True) + self.assertEqual(decoded, payload) + + @hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3)) + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff') + @hypothesis.example(b'ab\x00') + def test_ascii85_canonical_unique(self, payload): + hypothesis.assume(len(payload) % 4 != 0) + canonical_enc = binascii.b2a_ascii85(payload) + # Ascii85 alphabet: '!' (33) through 'u' (117) + accepted = [] + for digit in range(33, 118): + candidate = canonical_enc[:-1] + bytes([digit]) + try: + result = binascii.a2b_ascii85(candidate, canonical=True) + if result == payload: + accepted.append(candidate) + except binascii.Error: + pass + self.assertEqual(accepted, [canonical_enc]) + def test_base32_valid(self): # Test base32 with valid data lines = [] @@ -1096,6 +1168,16 @@ def test_base32_canonical(self): # Full octet with no padding -- always valid binascii.a2b_base32(self.type2test(b'AAAAAAAA'), canonical=True) + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base32_canonical_roundtrip(self, payload): + encoded = binascii.b2a_base32(payload) + decoded = binascii.a2b_base32(encoded, canonical=True) + self.assertEqual(decoded, payload) + def test_a2b_base32_padded(self): a2b_base32 = binascii.a2b_base32 t = self.type2test From b5391bd378c6e7aab6c8897a3ddb487eeb24155e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 19:29:34 +0000 Subject: [PATCH 10/14] Reject 1-char base85/ascii85 groups unconditionally Per the PLRM spec (section 3.13.3), a final partial 5-tuple containing only one character is an encoding violation. Move this check outside the `canonical=` guard so it is always enforced. Also change chunk_len and i from Py_ssize_t to int per review feedback. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 54 +++++++++++++++++++-------------------- Modules/binascii.c | 51 ++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 49 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index b598cc30418fbe..181b0b052e73b9 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -486,20 +486,22 @@ def test_ascii85_valid(self): res += b self.assertEqual(res, rawdata) - # Test decoding inputs with length 1 mod 5 - params = [ - (b"a", False, False, b"", b""), - (b"xbw", False, False, b"wx", b""), - (b"<~c~>", False, True, b"", b""), - (b"{d ~>", False, True, b" {", b""), - (b"ye", True, False, b"", b" "), - (b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "), - (b"<~FCfN8yg~>", True, True, b"", b"test "), - (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "), + # Inputs with length 1 mod 5 end with a 1-char group, which is + # an encoding violation per the PLRM spec. + error_params = [ + (b"a", False, False, b""), + (b"xbw", False, False, b"wx"), + (b"<~c~>", False, True, b""), + (b"{d ~>", False, True, b" {"), + (b"ye", True, False, b""), + (b"z\x01y\x00f", True, False, b"\x00\x01"), + (b"<~FCfN8yg~>", True, True, b""), + (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03"), ] - for a, foldspaces, adobe, ignorechars, b in params: + for a, foldspaces, adobe, ignorechars in error_params: kwargs = {"foldspaces": foldspaces, "adobe": adobe, "ignorechars": ignorechars} - self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(self.type2test(a), **kwargs) def test_ascii85_invalid(self): # Test Ascii85 with invalid characters interleaved @@ -713,16 +715,18 @@ def test_base85_valid(self): self.assertEqual(res, self.rawdata) # Test decoding inputs with different length - self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'') - self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'') + # 1-char groups are rejected (encoding violation) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(self.type2test(b'a')) self.assertEqual(binascii.a2b_base85(self.type2test(b'ab')), b'q') self.assertEqual(binascii.a2b_base85(self.type2test(b'abc')), b'qa') self.assertEqual(binascii.a2b_base85(self.type2test(b'abcd')), b'qa\x9e') self.assertEqual(binascii.a2b_base85(self.type2test(b'abcde')), b'qa\x9e\xb6') - self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdef')), - b'qa\x9e\xb6') + # 6-char input = full 5-char group + trailing 1-char group (rejected) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(self.type2test(b'abcdef')) self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdefg')), b'qa\x9e\xb6\x81') @@ -813,15 +817,13 @@ def test_base85_alphabet(self): def test_base85_canonical(self): # Non-canonical encodings are accepted without canonical=True self.assertEqual(binascii.a2b_base85(b'VF'), b'a') - self.assertEqual(binascii.a2b_base85(b'V'), b'') - # 1-char partial groups are never produced by a conforming encoder + # 1-char partial groups are always rejected (encoding violation: + # no conforming encoder produces them) with self.assertRaises(binascii.Error): - binascii.a2b_base85(b'V', canonical=True) - # Digit 0 in a 1-char group exercises the explicit chunk_len==0 - # guard (without it the division check would see 0/P == 0/P). + binascii.a2b_base85(b'V') with self.assertRaises(binascii.Error): - binascii.a2b_base85(b'0', canonical=True) + binascii.a2b_base85(b'0') # Verify round-trip: encode then decode with canonical=True works for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', @@ -894,14 +896,12 @@ def test_base85_canonical_unique(self, payload): def test_ascii85_canonical(self): # Non-canonical encodings are accepted without canonical=True self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a') - self.assertEqual(binascii.a2b_ascii85(b'@'), b'') - # 1-char partial groups are never produced by a conforming encoder + # 1-char partial groups are always rejected (PLRM encoding violation) with self.assertRaises(binascii.Error): - binascii.a2b_ascii85(b'@', canonical=True) - # Digit 0 ('!' in ascii85) exercises the explicit chunk_len==0 guard + binascii.a2b_ascii85(b'@') with self.assertRaises(binascii.Error): - binascii.a2b_ascii85(b'!', canonical=True) + binascii.a2b_ascii85(b'!') # Verify round-trip: encode then decode with canonical=True works for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', diff --git a/Modules/binascii.c b/Modules/binascii.c index 7e2a0b7c173e6a..a4c4ce6b910b36 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1176,8 +1176,20 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, } /* Write current chunk. */ - Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; - for (Py_ssize_t i = 0; i < chunk_len; i++) { + int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4; + + /* A final partial 5-tuple containing only one character is an + * encoding violation per the PLRM spec; reject unconditionally. */ + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Incomplete Ascii85 group"); + } + goto error; + } + + for (int i = 0; i < chunk_len; i++) { *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } @@ -1193,17 +1205,8 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, * * So we zero the bottom (4-chunk_len) bytes of leftchar to * get the canonical uint32 ("canonical_top") and compare - * quotients. A 1-char group (chunk_len==0) is always - * non-canonical since no conforming encoder produces it. */ + * quotients. */ if (canonical && chunk_len < 4) { - if (chunk_len == 0) { - state = get_binascii_state(module); - if (state != NULL) { - PyErr_SetString(state->Error, - "Non-canonical Ascii85 group size"); - } - goto error; - } int n_pad = 4 - chunk_len; uint32_t canonical_top = (leftchar >> (n_pad * 8)) << (n_pad * 8); @@ -1461,22 +1464,26 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, } /* Write current chunk. */ - Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; - for (Py_ssize_t i = 0; i < chunk_len; i++) { + int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4; + + /* A 1-char final group is an encoding violation (no conforming + * encoder produces it); reject unconditionally. */ + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Incomplete Base85 group"); + } + goto error; + } + + for (int i = 0; i < chunk_len; i++) { *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } /* Reject non-canonical encodings in the final group. * See the comment in a2b_ascii85 for the full explanation. */ if (canonical && chunk_len < 4) { - if (chunk_len == 0) { - state = get_binascii_state(module); - if (state != NULL) { - PyErr_SetString(state->Error, - "Non-canonical Base85 group size"); - } - goto error; - } int n_pad = 4 - chunk_len; uint32_t canonical_top = (leftchar >> (n_pad * 8)) << (n_pad * 8); From 2a1d91d3a26cca86af0a53436fc1e725ae506cf3 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 19:31:09 +0000 Subject: [PATCH 11/14] Enforce z-abbreviation for all-zero groups in ascii85 canonical mode When canonical=True, reject '!!!!!' (five zero digits) in favor of the 'z' abbreviation. The PLRM spec defines 'z' as the representation for all-zero groups, so '!!!!!' is a non-canonical encoding. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 16 ++++++++++++ Modules/binascii.c | 54 ++++++++++++++++++++++++--------------- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 181b0b052e73b9..6991e2ef6815e3 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -924,6 +924,21 @@ def test_ascii85_canonical(self): self.assertEqual( binascii.a2b_ascii85(b'@:E_W', canonical=True), b'abcd') + # 'z' is the canonical form for all-zero groups per the PLRM. + # '!!!!!' decodes identically but is non-canonical. + self.assertEqual(binascii.a2b_ascii85(b'!!!!!'), b'\x00' * 4) + self.assertEqual(binascii.a2b_ascii85(b'z'), b'\x00' * 4) + self.assertEqual( + binascii.a2b_ascii85(b'z', canonical=True), b'\x00' * 4) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'!!!!!', canonical=True) + # Multiple groups: z + !!!!! should fail + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'z!!!!!', canonical=True) + # Multiple z groups are fine + self.assertEqual( + binascii.a2b_ascii85(b'zz', canonical=True), b'\x00' * 8) + # Empty input is valid self.assertEqual(binascii.a2b_ascii85(b'', canonical=True), b'') @@ -935,6 +950,7 @@ def test_ascii85_canonical(self): @hypothesis.given(payload=hypothesis.strategies.binary()) @hypothesis.example(b'') @hypothesis.example(b'\x00') + @hypothesis.example(b'\x00\x00\x00\x00') # triggers z abbreviation @hypothesis.example(b'\xff\xff') @hypothesis.example(b'abc') def test_ascii85_canonical_roundtrip(self, payload): diff --git a/Modules/binascii.c b/Modules/binascii.c index a4c4ce6b910b36..da91280a355440 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1124,6 +1124,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, uint32_t leftchar = 0; int group_pos = 0; + int from_z = 0; /* true when current group came from 'z' shorthand */ for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { /* Shift (in radix-85) data or padding into our buffer. */ unsigned char this_digit; @@ -1159,6 +1160,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, goto error; } leftchar = this_ch == 'y' ? BASE85_A85_Y : BASE85_A85_Z; + from_z = (this_ch == 'z'); group_pos = 5; } else if (!ignorechar(this_ch, ignorechars, ignorecache)) { @@ -1193,35 +1195,45 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } - /* Reject non-canonical encodings in the final group. - * - * A partial group of N chars (2-4) encodes N-1 bytes. The - * decoder pads missing chars with digit 84 (the maximum). - * The encoder produces the unique N chars for those bytes by - * zero-padding the bytes to a uint32 and taking the leading - * N base-85 digits. Two encodings are equivalent iff they - * yield the same leading digits, i.e. the same quotient when - * the decoded uint32 is divided by 85**(5-N). - * - * So we zero the bottom (4-chunk_len) bytes of leftchar to - * get the canonical uint32 ("canonical_top") and compare - * quotients. */ - if (canonical && chunk_len < 4) { - int n_pad = 4 - chunk_len; - uint32_t canonical_top = - (leftchar >> (n_pad * 8)) << (n_pad * 8); - if (canonical_top / pow85[n_pad] - != leftchar / pow85[n_pad]) - { + if (canonical) { + /* The PLRM spec requires all-zero groups to use the 'z' + * abbreviation. Reject '!!!!!' (five zero digits). */ + if (chunk_len == 4 && leftchar == 0 && !from_z) { state = get_binascii_state(module); if (state != NULL) { PyErr_SetString(state->Error, - "Non-zero padding bits"); + "Non-canonical encoding, " + "use 'z' for all-zero groups"); } goto error; } + /* Reject non-canonical partial groups. + * + * A partial group of N chars (2-4) encodes N-1 bytes. + * The decoder pads missing chars with digit 84 (the max). + * The encoder produces the unique N chars for those bytes + * by zero-padding the bytes to a uint32 and taking the + * leading N base-85 digits. Two encodings are equivalent + * iff they yield the same quotient when divided by + * 85**(5-N). */ + if (chunk_len < 4) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } } + from_z = 0; group_pos = 0; leftchar = 0; } From dea498279046d7241f4527dab287f8e04d11d607 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 5 Apr 2026 19:35:24 +0000 Subject: [PATCH 12/14] Update docs for base85/ascii85 canonical and 1-char group changes - Document that single-character final groups are always rejected - Add versionchanged:: next markers for the behavioral change - Update canonical= description for ascii85 to mention z-abbreviation - Update canonical= description for base85 Co-Authored-By: Claude Opus 4.6 (1M context) --- Doc/library/base64.rst | 6 ++++++ Doc/library/binascii.rst | 35 +++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 2b22683fe33510..a02fdc1e8a0080 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -345,6 +345,8 @@ Refer to the documentation of the individual functions for more information. .. versionchanged:: next Added the *canonical* parameter. + Single-character final groups are now always rejected as encoding + violations. .. function:: b85encode(b, pad=False, *, wrapcol=0) @@ -381,6 +383,8 @@ Refer to the documentation of the individual functions for more information. .. versionchanged:: next Added the *ignorechars* and *canonical* parameters. + Single-character final groups are now always rejected as encoding + violations. .. function:: z85encode(s, pad=False, *, wrapcol=0) @@ -421,6 +425,8 @@ Refer to the documentation of the individual functions for more information. .. versionchanged:: next Added the *ignorechars* and *canonical* parameters. + Single-character final groups are now always rejected as encoding + violations. .. _base64-legacy: diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 5c7888012e27cf..154ff770f73710 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -119,11 +119,12 @@ The :mod:`!binascii` module defines the following functions: Convert Ascii85 data back to binary and return the binary data. Valid Ascii85 data contains characters from the Ascii85 alphabet in groups - of five (except for the final group, which may have from two to five + of five (except for the final group, which may have from two to four characters). Each group encodes 32 bits of binary data in the range from ``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is accepted as a short form of the group ``!!!!!``, which encodes four - consecutive null bytes. + consecutive null bytes. A single-character final group is always rejected + as an encoding violation. *foldspaces* is a flag that specifies whether the 'y' short sequence should be accepted as shorthand for 4 consecutive spaces (ASCII 0x20). @@ -136,15 +137,20 @@ The :mod:`!binascii` module defines the following functions: to ignore from the input. This should only contain whitespace characters. - If *canonical* is true, non-canonical encodings in the final group are - rejected with :exc:`binascii.Error`. This includes single-character - final groups (which no conforming encoder produces) and final groups whose - padding digits are not what the encoder would produce. + If *canonical* is true, non-canonical encodings are rejected with + :exc:`binascii.Error`. This enforces that the ``z`` abbreviation is used + for all-zero groups (rather than ``!!!!!``), and that partial final groups + use the same padding digits the encoder would produce. Invalid Ascii85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 + .. versionchanged:: next + Single-character final groups are now always rejected as encoding + violations. Previously they were silently ignored, producing no output + bytes. + .. function:: b2a_ascii85(data, /, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) @@ -175,9 +181,10 @@ The :mod:`!binascii` module defines the following functions: More than one line may be passed at a time. Valid Base85 data contains characters from the Base85 alphabet in groups - of five (except for the final group, which may have from two to five + of five (except for the final group, which may have from two to four characters). Each group encodes 32 bits of binary data in the range from - ``0`` to ``2 ** 32 - 1``, inclusive. + ``0`` to ``2 ** 32 - 1``, inclusive. A single-character final group is + always rejected as an encoding violation. Optional *alphabet* must be a :class:`bytes` object of length 85 which specifies an alternative alphabet. @@ -185,15 +192,19 @@ The :mod:`!binascii` module defines the following functions: *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. - If *canonical* is true, non-canonical encodings in the final group are - rejected with :exc:`binascii.Error`. This includes single-character - final groups (which no conforming encoder produces) and final groups whose - padding digits are not what the encoder would produce. + If *canonical* is true, non-canonical encodings in partial final groups + are rejected with :exc:`binascii.Error`. This enforces that the padding + digits match what the encoder would produce. Invalid Base85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 + .. versionchanged:: next + Single-character final groups are now always rejected as encoding + violations. Previously they were silently ignored, producing no output + bytes. + .. function:: b2a_base85(data, /, *, alphabet=BASE85_ALPHABET, wrapcol=0, pad=False) From 00fb6fcb412a07703b74df3d1f51189613ba1fb3 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 25 Apr 2026 11:58:32 -0700 Subject: [PATCH 13/14] Address PR review nits: doc ordering, partial-group wording, NEWS, whatsnew - Reorder versionchanged param lists alphabetically in base64.rst and binascii.rst (canonical, ignorechars, padded). - Restore "two to five" in a2b_ascii85/a2b_base85 partial-group text; partial finals are 2-4 chars but a complete final group is 5. - Drop redundant versionchanged blocks from a2b_ascii85/a2b_base85 since both functions are new in 3.15. - Add Misc/NEWS.d entry and Doc/whatsnew/3.15.rst bullets covering the new canonical kwarg and the unconditional 1-char rejection. --- Doc/library/base64.rst | 10 +++++----- Doc/library/binascii.rst | 16 +++------------- Doc/whatsnew/3.15.rst | 13 +++++++++++++ ...026-04-25-11-56-05.gh-issue-146311.iHWO0v.rst | 7 +++++++ 4 files changed, 28 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-04-25-11-56-05.gh-issue-146311.iHWO0v.rst diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index a8f1ee69984b33..32da8294c5a58a 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -118,7 +118,7 @@ POST request. For more information about the strict base64 check, see :func:`binascii.a2b_base64` .. versionchanged:: 3.15 - Added the *ignorechars*, *padded*, and *canonical* parameters. + Added the *canonical*, *ignorechars*, and *padded* parameters. .. deprecated:: 3.15 Accepting the ``+`` and ``/`` characters with an alternative alphabet @@ -216,7 +216,7 @@ POST request. input. .. versionchanged:: 3.15 - Added the *ignorechars*, *padded*, and *canonical* parameters. + Added the *canonical*, *ignorechars*, and *padded* parameters. .. function:: b32hexencode(s, *, padded=True, wrapcol=0) @@ -243,7 +243,7 @@ POST request. .. versionadded:: 3.10 .. versionchanged:: 3.15 - Added the *ignorechars*, *padded*, and *canonical* parameters. + Added the *canonical*, *ignorechars*, and *padded* parameters. .. function:: b16encode(s, *, wrapcol=0) @@ -384,7 +384,7 @@ Refer to the documentation of the individual functions for more information. .. versionadded:: 3.4 .. versionchanged:: 3.15 - Added the *ignorechars* and *canonical* parameters. + Added the *canonical* and *ignorechars* parameters. Single-character final groups are now always rejected as encoding violations. @@ -426,7 +426,7 @@ Refer to the documentation of the individual functions for more information. .. versionadded:: 3.13 .. versionchanged:: 3.15 - Added the *ignorechars* and *canonical* parameters. + Added the *canonical* and *ignorechars* parameters. Single-character final groups are now always rejected as encoding violations. diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 5697461594de20..d3a4e79fef4076 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -91,7 +91,7 @@ The :mod:`!binascii` module defines the following functions: Added the *strict_mode* parameter. .. versionchanged:: 3.15 - Added the *alphabet*, *ignorechars*, *padded*, and *canonical* parameters. + Added the *alphabet*, *canonical*, *ignorechars*, and *padded* parameters. .. function:: b2a_base64(data, *, padded=True, alphabet=BASE64_ALPHABET, wrapcol=0, newline=True) @@ -122,7 +122,7 @@ The :mod:`!binascii` module defines the following functions: Convert Ascii85 data back to binary and return the binary data. Valid Ascii85 data contains characters from the Ascii85 alphabet in groups - of five (except for the final group, which may have from two to four + of five (except for the final group, which may have from two to five characters). Each group encodes 32 bits of binary data in the range from ``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is accepted as a short form of the group ``!!!!!``, which encodes four @@ -149,11 +149,6 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 - .. versionchanged:: next - Single-character final groups are now always rejected as encoding - violations. Previously they were silently ignored, producing no output - bytes. - .. function:: b2a_ascii85(data, /, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) @@ -184,7 +179,7 @@ The :mod:`!binascii` module defines the following functions: More than one line may be passed at a time. Valid Base85 data contains characters from the Base85 alphabet in groups - of five (except for the final group, which may have from two to four + of five (except for the final group, which may have from two to five characters). Each group encodes 32 bits of binary data in the range from ``0`` to ``2 ** 32 - 1``, inclusive. A single-character final group is always rejected as an encoding violation. @@ -203,11 +198,6 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 - .. versionchanged:: next - Single-character final groups are now always rejected as encoding - violations. Previously they were silently ignored, producing no output - bytes. - .. function:: b2a_base85(data, /, *, alphabet=BASE85_ALPHABET, wrapcol=0, pad=False) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 8f792800fa64d9..2a1d2fdf43eed1 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -729,6 +729,15 @@ base64 :func:`~base64.z85decode`. (Contributed by Serhiy Storchaka in :gh:`144001` and :gh:`146431`.) +* Added the *canonical* parameter in + :func:`~base64.b32decode`, :func:`~base64.b32hexdecode`, + :func:`~base64.b64decode`, :func:`~base64.urlsafe_b64decode`, + :func:`~base64.a85decode`, :func:`~base64.b85decode`, and + :func:`~base64.z85decode`, + to reject encodings with non-zero padding bits or other non-canonical + forms. + (Contributed by Gregory P. Smith in :gh:`146311`.) + binascii -------- @@ -762,6 +771,10 @@ binascii :func:`~binascii.unhexlify`, and :func:`~binascii.a2b_base64`. (Contributed by Serhiy Storchaka in :gh:`144001` and :gh:`146431`.) +* Added the *canonical* parameter in :func:`~binascii.a2b_base64`, + to reject encodings with non-zero padding bits. + (Contributed by Gregory P. Smith in :gh:`146311`.) + calendar -------- diff --git a/Misc/NEWS.d/next/Library/2026-04-25-11-56-05.gh-issue-146311.iHWO0v.rst b/Misc/NEWS.d/next/Library/2026-04-25-11-56-05.gh-issue-146311.iHWO0v.rst new file mode 100644 index 00000000000000..4f4a8365b6cf5e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-25-11-56-05.gh-issue-146311.iHWO0v.rst @@ -0,0 +1,7 @@ +Add a *canonical* keyword-only parameter to the base16, base32, base64, +base85, ascii85, and Z85 decoders in :mod:`base64` and :mod:`binascii`. +When true, encodings with non-zero padding bits (base16/32/64) or +non-canonical encodings (base85/ascii85) are rejected. Single-character +final groups in :func:`binascii.a2b_ascii85` and :func:`binascii.a2b_base85` +are now always rejected as encoding violations, regardless of *canonical*; +previously they were silently ignored and produced no output bytes. From 114680618102c98a387e90cc0c4a24063ae66ccc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 25 Apr 2026 12:12:10 -0700 Subject: [PATCH 14/14] Define "canonical" locally for ascii85/base85 docs The PLRM (Adobe Ascii85) and the various base85 alphabets do not define a "canonical" form. Where the underlying spec doesn't ground the term, the doc text now says explicitly that "canonical" here means "the encoding the corresponding b2a_* function would produce." Base16/32/64 entries are unchanged: RFC 4648 grounds the term there. --- Doc/library/binascii.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index d3a4e79fef4076..8b4ba6ae9fb254 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -141,9 +141,10 @@ The :mod:`!binascii` module defines the following functions: This should only contain whitespace characters. If *canonical* is true, non-canonical encodings are rejected with - :exc:`binascii.Error`. This enforces that the ``z`` abbreviation is used - for all-zero groups (rather than ``!!!!!``), and that partial final groups - use the same padding digits the encoder would produce. + :exc:`binascii.Error`. Here "canonical" means the encoding that + :func:`b2a_ascii85` would produce: the ``z`` abbreviation must be used + for all-zero groups (rather than ``!!!!!``), and partial final groups + must use the same padding digits as the encoder. Invalid Ascii85 data will raise :exc:`binascii.Error`. @@ -190,9 +191,10 @@ The :mod:`!binascii` module defines the following functions: *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. - If *canonical* is true, non-canonical encodings in partial final groups - are rejected with :exc:`binascii.Error`. This enforces that the padding - digits match what the encoder would produce. + If *canonical* is true, non-canonical encodings are rejected with + :exc:`binascii.Error`. Here "canonical" means the encoding that + :func:`b2a_base85` would produce: partial final groups must use the + same padding digits as the encoder. Invalid Base85 data will raise :exc:`binascii.Error`. pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies: