pFad - Phone/Frame/Anonymizer/Declutterfier! Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/python/cpython/commit/a135a6d2c6d503b186695f01efa7eed65611b04e

gh-112943: Correctly compute end offsets for multiline tokens in the … · python/cpython@a135a6d · GitHub
Skip to content

Commit a135a6d

Browse files
authored
gh-112943: Correctly compute end offsets for multiline tokens in the tokenize module (#112949)
1 parent 4c5b9c1 commit a135a6d

5 files changed

Lines changed: 25 additions & 6 deletions

File tree

Lib/test/test_tokenize.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,16 @@ def test_string(self):
615615
OP '}' (3, 0) (3, 1)
616616
FSTRING_MIDDLE '__' (3, 1) (3, 3)
617617
FSTRING_END "'" (3, 3) (3, 4)
618+
""")
619+
620+
self.check_tokenize("""\
621+
'''Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli
622+
aktualni pracownicy, obecni pracownicy'''
623+
""", """\
624+
INDENT ' ' (1, 0) (1, 4)
625+
STRING "'''Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli\\n aktualni pracownicy, obecni pracownicy'''" (1, 4) (2, 45)
626+
NEWLINE '\\n' (2, 45) (2, 46)
627+
DEDENT '' (3, 0) (3, 0)
618628
""")
619629

620630
def test_function(self):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Correctly compute end column offsets for multiline tokens in the
2+
:mod:`tokenize` module. Patch by Pablo Galindo

Parser/pegen.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,8 @@ _PyPegen_interactive_exit(Parser *p)
1919
}
2020

2121
Py_ssize_t
22-
_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
22+
_PyPegen_byte_offset_to_character_offset_raw(const char* str, Py_ssize_t col_offset)
2323
{
24-
const char *str = PyUnicode_AsUTF8(line);
25-
if (!str) {
26-
return -1;
27-
}
2824
Py_ssize_t len = strlen(str);
2925
if (col_offset > len + 1) {
3026
col_offset = len + 1;
@@ -39,6 +35,16 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
3935
return size;
4036
}
4137

38+
Py_ssize_t
39+
_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
40+
{
41+
const char *str = PyUnicode_AsUTF8(line);
42+
if (!str) {
43+
return -1;
44+
}
45+
return _PyPegen_byte_offset_to_character_offset_raw(str, col_offset);
46+
}
47+
4248
// Here, mark is the start of the node, while p->mark is the end.
4349
// If node==NULL, they should be the same.
4450
int

Parser/pegen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ expr_ty _PyPegen_name_token(Parser *p);
149149
expr_ty _PyPegen_number_token(Parser *p);
150150
void *_PyPegen_string_token(Parser *p);
151151
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
152+
Py_ssize_t _PyPegen_byte_offset_to_character_offset_raw(const char*, Py_ssize_t col_offset);
152153

153154
// Error handling functions and APIs
154155
typedef enum {

Python/Python-tokenize.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ tokenizeriter_next(tokenizeriterobject *it)
225225
col_offset = _PyPegen_byte_offset_to_character_offset(line, token.start - line_start);
226226
}
227227
if (token.end != NULL && token.end >= it->tok->line_start) {
228-
end_col_offset = _PyPegen_byte_offset_to_character_offset(line, token.end - it->tok->line_start);
228+
end_col_offset = _PyPegen_byte_offset_to_character_offset_raw(it->tok->line_start, token.end - it->tok->line_start);
229229
}
230230

231231
if (it->tok->tok_extra_tokens) {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.





Check this box to remove all script contents from the fetched content.



Check this box to remove all images from the fetched content.


Check this box to remove all CSS styles from the fetched content.


Check this box to keep images inefficiently compressed and original size.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy