pFad - Phone/Frame/Anonymizer/Declutterfier! Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

URL: http://github.com/python/cpython/commit/20a2c6482e28a2ca8d257ba646f2b8ead4837387

97560d244c08.css" /> #23144: merge with 3.4. · python/cpython@20a2c64 · GitHub
Skip to content

Commit 20a2c64

Browse files
committed
#23144: merge with 3.4.
2 parents 2ba3980 + 6f2bb98 commit 20a2c64

File tree

3 files changed

+26
-5
lines changed

3 files changed

+26
-5
lines changed

Lib/html/parser.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,15 @@ def goahead(self, end):
139139
if self.convert_charrefs and not self.cdata_elem:
140140
j = rawdata.find('<', i)
141141
if j < 0:
142-
if not end:
142+
# if we can't find the next <, either we are at the end
143+
# or there's more text incoming. If the latter is True,
144+
# we can't pass the text to handle_data in case we have
145+
# a charref cut in half at end. Try to determine if
146+
# this is the case before proceding by looking for an
147+
# & near the end and see if it's followed by a space or ;.
148+
amppos = rawdata.rfind('&', max(i, n-34))
149+
if (amppos >= 0 and
150+
not re.compile(r'[\s;]').search(rawdata, amppos)):
143151
break # wait till we get all the text
144152
j = n
145153
else:

Lib/test/test_htmlparser.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,6 @@ def handle_starttag(self, tag, attrs):
7272

7373
class EventCollectorCharrefs(EventCollector):
7474

75-
def get_events(self):
76-
return self.events
77-
7875
def handle_charref(self, data):
7976
self.fail('This should never be called with convert_charrefs=True')
8077

@@ -633,6 +630,18 @@ def test_broken_condcoms(self):
633630
]
634631
self._run_check(html, expected)
635632

633+
def test_convert_charrefs_dropped_text(self):
634+
# #23144: make sure that all the events are triggered when
635+
# convert_charrefs is True, even if we don't call .close()
636+
parser = EventCollector(convert_charrefs=True)
637+
# before the fix, bar & baz was missing
638+
parser.feed("foo <a>link</a> bar &amp; baz")
639+
self.assertEqual(
640+
parser.get_events(),
641+
[('data', 'foo '), ('starttag', 'a', []), ('data', 'link'),
642+
('endtag', 'a'), ('data', ' bar & baz')]
643+
)
644+
636645

637646
class AttributesTestCase(TestCaseBase):
638647

Misc/NEWS

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
+++++++++++
1+
+++++++++++
22
Python News
33
+++++++++++
44

@@ -95,9 +95,13 @@ Core and Builtins
9595
Library
9696
-------
9797

98+
- Issue #23144: Make sure that HTMLParser.feed() returns all the data, even
99+
when convert_charrefs is True.
100+
98101
- Issue #24635: Fixed a bug in typing.py where isinstance([], typing.Iterable)
99102
would return True once, then False on subsequent calls.
100103

104+
101105
- Issue #24989: Fixed buffer overread in BytesIO.readline() if a position is
102106
set beyond size. Based on patch by John Leitch.
103107

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.





Check this box to remove all script contents from the fetched content.



Check this box to remove all images from the fetched content.


Check this box to remove all CSS styles from the fetched content.


Check this box to keep images inefficiently compressed and original size.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy