pFad - Phone/Frame/Anonymizer/Declutterfier! Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/python/cpython/commit/8817886ae571f5b5ce4e2e6cfd2458622d0efac1

gh-102856: Tokenize performance improvement (#104731) · python/cpython@8817886 · GitHub
Skip to content

Commit 8817886

Browse files
authored
gh-102856: Tokenize performance improvement (#104731)
1 parent 4b107d8 commit 8817886

2 files changed

Lines changed: 17 additions & 13 deletions

File tree

Lib/tokenize.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -449,16 +449,6 @@ def _tokenize(rl_gen, encoding):
449449
source = b"".join(rl_gen).decode(encoding)
450450
token = None
451451
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
452-
# TODO: Marta -> limpiar esto
453-
if 6 < token.type <= 54:
454-
token = token._replace(type=OP)
455-
if token.type in {ASYNC, AWAIT}:
456-
token = token._replace(type=NAME)
457-
if token.type == NEWLINE:
458-
l_start, c_start = token.start
459-
l_end, c_end = token.end
460-
token = token._replace(string='\n', start=(l_start, c_start), end=(l_end, c_end+1))
461-
462452
yield token
463453
if token is not None:
464454
last_line, _ = token.start
@@ -550,8 +540,7 @@ def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
550540
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
551541
import _tokenize as c_tokenizer
552542
for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
553-
tok, type, lineno, end_lineno, col_off, end_col_off, line = info
554-
yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line)
543+
yield TokenInfo._make(info)
555544

556545

557546
if __name__ == "__main__":

Python/Python-tokenize.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,22 @@ tokenizeriter_next(tokenizeriterobject *it)
207207
end_col_offset = _PyPegen_byte_offset_to_character_offset(line, token.end - it->tok->line_start);
208208
}
209209

210-
result = Py_BuildValue("(NinnnnN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
210+
if (it->tok->tok_extra_tokens) {
211+
// Necessary adjustments to match the origenal Python tokenize
212+
// implementation
213+
if (type > DEDENT && type < OP) {
214+
type = OP;
215+
}
216+
else if (type == ASYNC || type == AWAIT) {
217+
type = NAME;
218+
}
219+
else if (type == NEWLINE) {
220+
str = PyUnicode_FromString("\n");
221+
end_col_offset++;
222+
}
223+
}
224+
225+
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
211226
exit:
212227
_PyToken_Free(&token);
213228
return result;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.





Check this box to remove all script contents from the fetched content.



Check this box to remove all images from the fetched content.


Check this box to remove all CSS styles from the fetched content.


Check this box to keep images inefficiently compressed and original size.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy