Declutterfier! Saves Data!

--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/python/cpython/pull/138620.patch

ding whitespace. - [line_start, leading_whitespace_end) is the leading - whitespace of this line, + if (smallest == NULL || PyObject_RichCompareBool(line, smallest, Py_LT)) { + smallest = line; + } + if (largest == NULL || PyObject_RichCompareBool(line, largest, Py_GT)) { + largest = line; + } + } - [_start, _start + _len) is the leading whitespace of the - current longest leading whitespace. */ - Py_ssize_t new_len = 0; - const char *_iter = _start, *line_iter = line_start; + if (smallest == NULL || largest == NULL) { + return 0; + } - while (_iter < _start + _len && line_iter < leading_whitespace_end - && *_iter == *line_iter) - { - ++_iter; - ++line_iter; - ++new_len; - } + Py_ssize_t margin = 0; + Py_ssize_t minlen = Py_MIN(PyUnicode_GET_LENGTH(smallest), + PyUnicode_GET_LENGTH(largest)); + int skind = PyUnicode_KIND(smallest); + int lkind = PyUnicode_KIND(largest); + const void *sdata = PyUnicode_DATA(smallest); + const void *ldata = PyUnicode_DATA(largest); - _len = new_len; - if (_len == 0) { - // No common things now, fast exit! - return 0; - } + while (margin < minlen) { + Py_UCS4 c1 = PyUnicode_READ(skind, sdata, margin); + Py_UCS4 c2 = PyUnicode_READ(lkind, ldata, margin); + if (c1 != c2 || !(c1 == ' ' || c1 == '\t')) { + break; } + margin++; } - assert(_len >= 0); - if (_len > 0) { - *output = _start; - } - return _len; + return margin; } /* Dedent a string. @@ -14395,74 +14377,58 @@ search_longest_common_leading_whitespace( PyObject * _PyUnicode_Dedent(PyObject *unicode) { - Py_ssize_t src_len = 0; - const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len); - if (!src) { + PyObject *sep = PyUnicode_FromString("\n"); + if (sep == NULL) { return NULL; } - assert(src_len >= 0); - if (src_len == 0) { - return Py_NewRef(unicode); - } - - const char *const end = src + src_len; - - // [whitespace_start, whitespace_start + whitespace_len) - // describes the current longest common leading whitespace - const char *whitespace_start = NULL; - Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( - src, end, &whitespace_start); - - if (whitespace_len == 0) { - return Py_NewRef(unicode); + PyObject *lines = PyUnicode_Split(unicode, sep, -1); + Py_DECREF(sep); + if (lines == NULL) { + return NULL; } + Py_ssize_t nlines = PyList_GET_SIZE(lines); + Py_ssize_t margin = search_longest_common_leading_whitespace(lines, nlines); - // now we should trigger a dedent - char *dest = PyMem_Malloc(src_len); - if (!dest) { - PyErr_NoMemory(); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + Py_DECREF(lines); return NULL; } - char *dest_iter = dest; - for (const char *iter = src; iter < end; ++iter) { - const char *line_start = iter; - bool in_leading_space = true; + for (Py_ssize_t i = 0; i < nlines; i++) { + PyObject *line = PyList_GET_ITEM(lines, i); + Py_ssize_t linelen = PyUnicode_GET_LENGTH(line); - // iterate over a line to find the end of a line - while (iter < end && *iter != '\n') { - if (in_leading_space && *iter != ' ' && *iter != '\t') { - in_leading_space = false; + int all_ws = 1; + int kind = PyUnicode_KIND(line); + void *data = PyUnicode_DATA(line); + for (Py_ssize_t j = 0; j < linelen; j++) { + if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))) { + all_ws = 0; + break; } - ++iter; } - // invariant: *iter == '\n' or iter == end - bool append_newline = iter < end; - - // if this line has all white space, write '\n' and continue - if (in_leading_space && append_newline) { - *dest_iter++ = '\n'; - continue; + if (!all_ws) { + Py_ssize_t start = Py_MIN(margin, linelen); + if (PyUnicodeWriter_WriteSubstring(writer, line, start, linelen) < 0) { + PyUnicodeWriter_Discard(writer); + Py_DECREF(lines); + return NULL; + } } - /* copy [new_line_start + whitespace_len, iter) to buffer, then - conditionally append '\n' */ - - Py_ssize_t new_line_len = iter - line_start - whitespace_len; - assert(new_line_len >= 0); - memcpy(dest_iter, line_start + whitespace_len, new_line_len); - - dest_iter += new_line_len; - - if (append_newline) { - *dest_iter++ = '\n'; + if (i < nlines - 1) { + if (PyUnicodeWriter_WriteChar(writer, '\n') < 0) { + PyUnicodeWriter_Discard(writer); + Py_DECREF(lines); + return NULL; + } } } - PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest); - PyMem_Free(dest); - return res; + Py_DECREF(lines); + return PyUnicodeWriter_Finish(writer); } static PyMethodDef unicode_methods[] = { From f6ace9d2b62352adb63f6fa9a452d13bb6b02089 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 16:06:00 +0100 Subject: [PATCH 2/5] Add comment & un-refactor --- Include/internal/pycore_unicodeobject.h | 4 +- Objects/unicodeobject.c | 193 ++++++++++++++---------- 2 files changed, 114 insertions(+), 83 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 57293171aca41b..c53bc084de072e 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -259,7 +259,9 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( /* Dedent a string. Behaviour is expected to be an exact match of `textwrap.dedent`. - Return a new reference on success, NULL with exception set on error. + Return a new reference on success, NULL with an exception set on error. + + Export for test_capi.test_unicode */ PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ee51cdcaa3d4cf..67898b56711c61 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14309,126 +14309,155 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) } /* - Find the longest common leading whitespace among a list of lines. - Whitespace-only lines are ignored. - Returns the margin length (>= 0). +This function searches the longest common leading whitespace +of all lines in the [src, end). +It returns the length of the common leading whitespace and sets *output* to +point to the beginning of the common leading whitespace if length > 0. */ static Py_ssize_t -search_longest_common_leading_whitespace(PyObject *lines, Py_ssize_t nlines) -{ - PyObject *smallest = NULL, *largest = NULL; - for (Py_ssize_t i = 0; i < nlines; i++) { - PyObject *line = PyList_GET_ITEM(lines, i); - Py_ssize_t linelen = PyUnicode_GET_LENGTH(line); - - if (linelen == 0) { - continue; - } - - int kind = PyUnicode_KIND(line); - void *data = PyUnicode_DATA(line); - int all_ws = 1; - for (Py_ssize_t j = 0; j < linelen; j++) { - if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))) { - all_ws = 0; - break; +search_longest_common_leading_whitespace( + const char *const src, + const char *const end, + const char **output) +{ + // [_start, _start + _len) + // describes the current longest common leading whitespace + const char *_start = NULL; + Py_ssize_t _len = 0; + + for (const char *iter = src; iter < end; ++iter) { + const char *line_start = iter; + const char *leading_whitespace_end = NULL; + + // scan the whole line + while (iter < end && *iter != '\n') { + if (!leading_whitespace_end && !Py_ISSPACE(Py_CHARMASK(*iter))) { + if (iter == line_start) { + // some line has no indent, fast exit! + return 0; + } + leading_whitespace_end = iter; } + ++iter; } - if (all_ws) { + + // if this line has all white space, skip it + if (!leading_whitespace_end) { continue; } - if (smallest == NULL || PyObject_RichCompareBool(line, smallest, Py_LT)) { - smallest = line; + if (!_start) { + // update the first leading whitespace + _start = line_start; + _len = leading_whitespace_end - line_start; + assert(_len > 0); } - if (largest == NULL || PyObject_RichCompareBool(line, largest, Py_GT)) { - largest = line; - } - } + else { + /* We then compare with the current longest leading whitespace. - if (smallest == NULL || largest == NULL) { - return 0; - } + [line_start, leading_whitespace_end) is the leading + whitespace of this line, - Py_ssize_t margin = 0; - Py_ssize_t minlen = Py_MIN(PyUnicode_GET_LENGTH(smallest), - PyUnicode_GET_LENGTH(largest)); - int skind = PyUnicode_KIND(smallest); - int lkind = PyUnicode_KIND(largest); - const void *sdata = PyUnicode_DATA(smallest); - const void *ldata = PyUnicode_DATA(largest); + [_start, _start + _len) is the leading whitespace of the + current longest leading whitespace. */ + Py_ssize_t new_len = 0; + const char *_iter = _start, *line_iter = line_start; - while (margin < minlen) { - Py_UCS4 c1 = PyUnicode_READ(skind, sdata, margin); - Py_UCS4 c2 = PyUnicode_READ(lkind, ldata, margin); - if (c1 != c2 || !(c1 == ' ' || c1 == '\t')) { - break; + while (_iter < _start + _len && line_iter < leading_whitespace_end + && *_iter == *line_iter) + { + ++_iter; + ++line_iter; + ++new_len; + } + + _len = new_len; + if (_len == 0) { + // No common things now, fast exit! + return 0; + } } - margin++; } - return margin; + assert(_len >= 0); + if (_len > 0) { + *output = _start; + } + return _len; } /* Dedent a string. - Behaviour is expected to be an exact match of `textwrap.dedent`. - Return a new reference on success, NULL with exception set on error. + Behaviour is expected to be an exact match of textwrap.dedent. + Return a new reference on success, NULL with an exception set on error. */ PyObject * _PyUnicode_Dedent(PyObject *unicode) { - PyObject *sep = PyUnicode_FromString("\n"); - if (sep == NULL) { + Py_ssize_t src_len = 0; + const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len); + if (!src) { return NULL; } - PyObject *lines = PyUnicode_Split(unicode, sep, -1); - Py_DECREF(sep); - if (lines == NULL) { - return NULL; + assert(src_len >= 0); + if (src_len == 0) { + return Py_NewRef(unicode); } - Py_ssize_t nlines = PyList_GET_SIZE(lines); - Py_ssize_t margin = search_longest_common_leading_whitespace(lines, nlines); - PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); - if (writer == NULL) { - Py_DECREF(lines); + const char *const end = src + src_len; + + // [whitespace_start, whitespace_start + whitespace_len) + // describes the current longest common leading whitespace + const char *whitespace_start = NULL; + const Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( + src, end, &whitespace_start); + + // now we should trigger a dedent + char *dest = PyMem_Malloc(src_len); + if (!dest) { + PyErr_NoMemory(); return NULL; } + char *dest_iter = dest; - for (Py_ssize_t i = 0; i < nlines; i++) { - PyObject *line = PyList_GET_ITEM(lines, i); - Py_ssize_t linelen = PyUnicode_GET_LENGTH(line); + for (const char *iter = src; iter < end; ++iter) { + const char *line_start = iter; + bool in_leading_space = true; - int all_ws = 1; - int kind = PyUnicode_KIND(line); - void *data = PyUnicode_DATA(line); - for (Py_ssize_t j = 0; j < linelen; j++) { - if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, j))) { - all_ws = 0; - break; + // iterate over a line to find the end of a line + while (iter < end && *iter != '\n') { + if (in_leading_space && !Py_ISSPACE(Py_CHARMASK(*iter))) { + in_leading_space = false; } + ++iter; } - if (!all_ws) { - Py_ssize_t start = Py_MIN(margin, linelen); - if (PyUnicodeWriter_WriteSubstring(writer, line, start, linelen) < 0) { - PyUnicodeWriter_Discard(writer); - Py_DECREF(lines); - return NULL; + // invariant: *iter == '\n' or iter == end + const bool append_newline = iter < end; + + // if this line has all white space, write '\n' and continue + if (in_leading_space) { + if (append_newline) { + *dest_iter++ = '\n'; } + continue; } - if (i < nlines - 1) { - if (PyUnicodeWriter_WriteChar(writer, '\n') < 0) { - PyUnicodeWriter_Discard(writer); - Py_DECREF(lines); - return NULL; - } + /* copy [new_line_start + whitespace_len, iter) to buffer, then + conditionally append '\n' */ + const Py_ssize_t new_line_len = iter - line_start - whitespace_len; + assert(new_line_len >= 0); + memcpy(dest_iter, line_start + whitespace_len, new_line_len); + + dest_iter += new_line_len; + + if (append_newline) { + *dest_iter++ = '\n'; } } - Py_DECREF(lines); - return PyUnicodeWriter_Finish(writer); + PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest); + PyMem_Free(dest); + return res; } static PyMethodDef unicode_methods[] = { From 3d3d957ca45772a2224f9fb940b83651e7981d9a Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 16:13:33 +0100 Subject: [PATCH 3/5] Revert some more --- Objects/unicodeobject.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 67898b56711c61..2feac651a8fcdf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14332,6 +14332,8 @@ search_longest_common_leading_whitespace( // scan the whole line while (iter < end && *iter != '\n') { if (!leading_whitespace_end && !Py_ISSPACE(Py_CHARMASK(*iter))) { + /* `iter` points to the first non-whitespace character + in this line */ if (iter == line_start) { // some line has no indent, fast exit! return 0; @@ -14408,7 +14410,7 @@ _PyUnicode_Dedent(PyObject *unicode) // [whitespace_start, whitespace_start + whitespace_len) // describes the current longest common leading whitespace const char *whitespace_start = NULL; - const Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( + Py_ssize_t whitespace_len = search_longest_common_leading_whitespace( src, end, &whitespace_start); // now we should trigger a dedent @@ -14432,7 +14434,7 @@ _PyUnicode_Dedent(PyObject *unicode) } // invariant: *iter == '\n' or iter == end - const bool append_newline = iter < end; + bool append_newline = iter < end; // if this line has all white space, write '\n' and continue if (in_leading_space) { @@ -14444,7 +14446,7 @@ _PyUnicode_Dedent(PyObject *unicode) /* copy [new_line_start + whitespace_len, iter) to buffer, then conditionally append '\n' */ - const Py_ssize_t new_line_len = iter - line_start - whitespace_len; + Py_ssize_t new_line_len = iter - line_start - whitespace_len; assert(new_line_len >= 0); memcpy(dest_iter, line_start + whitespace_len, new_line_len); From 4403936d7f278cd7dc48da0b23b93a45448ef696 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 16:22:30 +0100 Subject: [PATCH 4/5] Revert odd line removal --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2feac651a8fcdf..9e9a0d826c46bb 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14446,6 +14446,7 @@ _PyUnicode_Dedent(PyObject *unicode) /* copy [new_line_start + whitespace_len, iter) to buffer, then conditionally append '\n' */ + Py_ssize_t new_line_len = iter - line_start - whitespace_len; assert(new_line_len >= 0); memcpy(dest_iter, line_start + whitespace_len, new_line_len); From 0cf91f242a96ad7c5fb042a9b930440092cd69a8 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 7 Sep 2025 17:00:54 +0100 Subject: [PATCH 5/5] Changes --- Include/internal/pycore_unicodeobject.h | 2 +- Lib/test/test_capi/test_unicode.py | 1 - Modules/_testinternalcapi.c | 3 ++- Objects/unicodeobject.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index c53bc084de072e..68ac71b747eedc 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -261,7 +261,7 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( Behaviour is expected to be an exact match of `textwrap.dedent`. Return a new reference on success, NULL with an exception set on error. - Export for test_capi.test_unicode + Export for '_testinternalcapi' shared extension. */ PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode); diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index 5ddd26bc465852..f18377927cdbc2 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1162,7 +1162,6 @@ def foo(): ''' self.assertEqual(expect, dedent(text)) - @support.cpython_only @unittest.skipIf(_testlimitedcapi is None, 'need _testlimitedcapi module') def test_concat(self): diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 0776b088b4a1f2..af67a4d2b488ba 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -34,7 +34,7 @@ #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pylifecycle.h" // _PyInterpreterConfig_InitFromDict() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII() / _PyUnicode_Dedent() +#include "pycore_unicodeobject.h" // _PyUnicode_TransformDecimalAndSpaceToASCII(), _PyUnicode_Dedent() #include "clinic/_testinternalcapi.c.h" @@ -1416,6 +1416,7 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); } + /* Test _PyUnicode_Dedent() */ static PyObject * unicode_dedent(PyObject *self, PyObject *arg) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9e9a0d826c46bb..b767b964d68822 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14311,7 +14311,7 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) /* This function searches the longest common leading whitespace of all lines in the [src, end). -It returns the length of the common leading whitespace and sets *output* to +It returns the length of the common leading whitespace and sets `output` to point to the beginning of the common leading whitespace if length > 0. */ static Py_ssize_t @@ -14389,7 +14389,7 @@ search_longest_common_leading_whitespace( } /* Dedent a string. - Behaviour is expected to be an exact match of textwrap.dedent. + Behaviour is expected to be an exact match of `textwrap.dedent`. Return a new reference on success, NULL with an exception set on error. */ PyObject * pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies: