mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-25 02:45:12 +00:00
[utils] Improve cookie files support
+ Add support for UTF-8 in cookie files * Skip malformed cookie file entries instead of crashing (invalid entry len, invalid expires at)
This commit is contained in:
parent
f7f304910d
commit
c380cc28c4
3 changed files with 93 additions and 5 deletions
|
@ -39,6 +39,13 @@ def assert_cookie_has_value(key):
|
||||||
assert_cookie_has_value('HTTPONLY_COOKIE')
|
assert_cookie_has_value('HTTPONLY_COOKIE')
|
||||||
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
||||||
|
|
||||||
|
def test_malformed_cookies(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
# Cookies should be empty since all malformed cookie file entries
|
||||||
|
# will be ignored
|
||||||
|
self.assertFalse(cookiejar._cookies)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
9
test/testdata/cookies/malformed_cookies.txt
vendored
Normal file
9
test/testdata/cookies/malformed_cookies.txt
vendored
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# Netscape HTTP Cookie File
|
||||||
|
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||||
|
# This is a generated file! Do not edit.
|
||||||
|
|
||||||
|
# Cookie file entry with invalid number of fields - 6 instead of 7
|
||||||
|
www.foobar.foobar FALSE / FALSE 0 COOKIE
|
||||||
|
|
||||||
|
# Cookie file entry with invalid expires at
|
||||||
|
www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
|
|
@ -7,6 +7,7 @@
|
||||||
import binascii
|
import binascii
|
||||||
import calendar
|
import calendar
|
||||||
import codecs
|
import codecs
|
||||||
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
import ctypes
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
|
@ -30,6 +31,7 @@
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
import zlib
|
import zlib
|
||||||
|
@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
||||||
1. https://curl.haxx.se/docs/http-cookies.html
|
1. https://curl.haxx.se/docs/http-cookies.html
|
||||||
"""
|
"""
|
||||||
_HTTPONLY_PREFIX = '#HttpOnly_'
|
_HTTPONLY_PREFIX = '#HttpOnly_'
|
||||||
|
_ENTRY_LEN = 7
|
||||||
|
_HEADER = '''# Netscape HTTP Cookie File
|
||||||
|
# This file is generated by youtube-dl. Do not edit.
|
||||||
|
|
||||||
|
'''
|
||||||
|
_CookieFileEntry = collections.namedtuple(
|
||||||
|
'CookieFileEntry',
|
||||||
|
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
|
||||||
|
|
||||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
|
"""
|
||||||
|
Save cookies to a file.
|
||||||
|
|
||||||
|
Most of the code is taken from CPython 3.8 and slightly adapted
|
||||||
|
to support cookie files with UTF-8 in both python 2 and 3.
|
||||||
|
"""
|
||||||
|
if filename is None:
|
||||||
|
if self.filename is not None:
|
||||||
|
filename = self.filename
|
||||||
|
else:
|
||||||
|
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
|
||||||
|
|
||||||
# Store session cookies with `expires` set to 0 instead of an empty
|
# Store session cookies with `expires` set to 0 instead of an empty
|
||||||
# string
|
# string
|
||||||
for cookie in self:
|
for cookie in self:
|
||||||
if cookie.expires is None:
|
if cookie.expires is None:
|
||||||
cookie.expires = 0
|
cookie.expires = 0
|
||||||
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
|
|
||||||
|
with io.open(filename, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(self._HEADER)
|
||||||
|
now = time.time()
|
||||||
|
for cookie in self:
|
||||||
|
if not ignore_discard and cookie.discard:
|
||||||
|
continue
|
||||||
|
if not ignore_expires and cookie.is_expired(now):
|
||||||
|
continue
|
||||||
|
if cookie.secure:
|
||||||
|
secure = 'TRUE'
|
||||||
|
else:
|
||||||
|
secure = 'FALSE'
|
||||||
|
if cookie.domain.startswith('.'):
|
||||||
|
initial_dot = 'TRUE'
|
||||||
|
else:
|
||||||
|
initial_dot = 'FALSE'
|
||||||
|
if cookie.expires is not None:
|
||||||
|
expires = compat_str(cookie.expires)
|
||||||
|
else:
|
||||||
|
expires = ''
|
||||||
|
if cookie.value is None:
|
||||||
|
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||||
|
# with no name, whereas http.cookiejar regards it as a
|
||||||
|
# cookie with no value.
|
||||||
|
name = ''
|
||||||
|
value = cookie.name
|
||||||
|
else:
|
||||||
|
name = cookie.name
|
||||||
|
value = cookie.value
|
||||||
|
f.write(
|
||||||
|
'\t'.join([cookie.domain, initial_dot, cookie.path,
|
||||||
|
secure, expires, name, value]) + '\n')
|
||||||
|
|
||||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
"""Load cookies from a file."""
|
"""Load cookies from a file."""
|
||||||
|
@ -2752,12 +2806,30 @@ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||||
else:
|
else:
|
||||||
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
|
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
|
||||||
|
|
||||||
|
def prepare_line(line):
|
||||||
|
if line.startswith(self._HTTPONLY_PREFIX):
|
||||||
|
line = line[len(self._HTTPONLY_PREFIX):]
|
||||||
|
# comments and empty lines are fine
|
||||||
|
if line.startswith('#') or not line.strip():
|
||||||
|
return line
|
||||||
|
cookie_list = line.split('\t')
|
||||||
|
if len(cookie_list) != self._ENTRY_LEN:
|
||||||
|
raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
||||||
|
cookie = self._CookieFileEntry(*cookie_list)
|
||||||
|
if cookie.expires_at and not cookie.expires_at.isdigit():
|
||||||
|
raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
||||||
|
return line
|
||||||
|
|
||||||
cf = io.StringIO()
|
cf = io.StringIO()
|
||||||
with open(filename) as f:
|
with io.open(filename, encoding='utf-8') as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
if line.startswith(self._HTTPONLY_PREFIX):
|
try:
|
||||||
line = line[len(self._HTTPONLY_PREFIX):]
|
cf.write(prepare_line(line))
|
||||||
cf.write(compat_str(line))
|
except compat_cookiejar.LoadError as e:
|
||||||
|
write_string(
|
||||||
|
'WARNING: skipping cookie file entry due to %s: %r\n'
|
||||||
|
% (e, line), sys.stderr)
|
||||||
|
continue
|
||||||
cf.seek(0)
|
cf.seek(0)
|
||||||
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
||||||
# Session cookies are denoted by either `expires` field set to
|
# Session cookies are denoted by either `expires` field set to
|
||||||
|
|
Loading…
Reference in a new issue