0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-22 02:15:12 +00:00

[parsing] support comment end tag '--!>' as suggested by github-advanced-security bot

This commit is contained in:
flashdagger 2023-11-13 07:14:14 +01:00
parent c34166d7c8
commit a91d9e1084
No known key found for this signature in database
GPG key ID: 0A82E44224B28001
2 changed files with 3 additions and 3 deletions

View file

@ -343,7 +343,7 @@ def mark_comments(_string, char='^', nochar='-'):
-----------^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------- -----------^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------
this is a leftover comment --> <!-- a new comment without closing this is a leftover comment --> <!-- a new comment without closing
^^^^^^^^^^^^^^^^^^^^^^^^^^^------------^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^------------^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
here is <!-- a comment --> and <!-- another comment --> end here is <!-- a comment --> and <!-- another comment --!> end
----------------^^^^^^^^^^^----------------^^^^^^^^^^^^^^^^^--------- ----------------^^^^^^^^^^^----------------^^^^^^^^^^^^^^^^^---------
<script> ignore here </script> <SCRIPT> and here </SCRIPT> <script> ignore here </script> <SCRIPT> and here </SCRIPT>
--------^^^^^^^^^^^^^-----------------------------^^^^^^^^^^--------- --------^^^^^^^^^^^^^-----------------------------^^^^^^^^^^---------

View file

@ -20,7 +20,7 @@ class HTMLIgnoreRanges:
if offset in ranges: if offset in ranges:
... ...
""" """
REGEX = re.compile(r'<!--|-->|</?\s*(?:script|style)\b[^>]*>', flags=re.IGNORECASE) REGEX = re.compile(r'<!--|--!?>|</?\s*(?:script|style)\b[^>]*>', flags=re.IGNORECASE)
def __init__(self, html): def __init__(self, html):
self.html = html self.html = html
@ -40,7 +40,7 @@ def __contains__(self, offset):
if self._last_match is None: if self._last_match is None:
return False return False
match_string = self._last_match.group() match_string = self._last_match.group()
if match_string.startswith('</') or match_string == '-->': if match_string.startswith('</') or match_string in ('-->', '--!>'):
return offset < self._last_match.start() return offset < self._last_match.start()
return offset >= self._last_match.end() return offset >= self._last_match.end()