mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-16 01:25:06 +00:00
[utils] Place sanitize url function near other sanitizing functions
This commit is contained in:
parent
dc03a42537
commit
92a4793b3c
2 changed files with 28 additions and 31 deletions
|
@ -39,6 +39,7 @@
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_path,
|
sanitize_path,
|
||||||
|
sanitize_url_path_consecutive_slashes,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
@ -55,7 +56,6 @@
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
render_table,
|
render_table,
|
||||||
match_str,
|
match_str,
|
||||||
url_sanitize_consecutive_slashes,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -169,6 +169,26 @@ def test_sanitize_path(self):
|
||||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||||
|
|
||||||
|
def test_sanitize_url_path_consecutive_slashes(self):
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||||
|
'http://hostname/')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||||
|
'http://hostname/')
|
||||||
|
self.assertEqual(
|
||||||
|
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||||
|
'http://hostname/abc/')
|
||||||
|
|
||||||
def test_ordered_set(self):
|
def test_ordered_set(self):
|
||||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||||
self.assertEqual(orderedSet([]), [])
|
self.assertEqual(orderedSet([]), [])
|
||||||
|
@ -539,21 +559,6 @@ def test_match_str(self):
|
||||||
'like_count > 100 & dislike_count <? 50 & description',
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
{'like_count': 190, 'dislike_count': 10}))
|
{'like_count': 190, 'dislike_count': 10}))
|
||||||
|
|
||||||
def test_url_sanitize_consecutive_slashes(self):
|
|
||||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
|
||||||
'http://hostname/foo//bar/filename.html'),
|
|
||||||
'http://hostname/foo/bar/filename.html')
|
|
||||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
|
||||||
'http://hostname//foo/bar/filename.html'),
|
|
||||||
'http://hostname/foo/bar/filename.html')
|
|
||||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
|
||||||
'http://hostname//'), 'http://hostname/')
|
|
||||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
|
||||||
'http://hostname/foo/bar/filename.html'),
|
|
||||||
'http://hostname/foo/bar/filename.html')
|
|
||||||
self.assertEqual(url_sanitize_consecutive_slashes(
|
|
||||||
'http://hostname/'), 'http://hostname/')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -326,6 +326,13 @@ def sanitize_path(s):
|
||||||
return os.path.join(*sanitized_path)
|
return os.path.join(*sanitized_path)
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_url_path_consecutive_slashes(url):
|
||||||
|
"""Collapses consecutive slashes in URLs' path"""
|
||||||
|
parsed_url = list(compat_urlparse.urlparse(url))
|
||||||
|
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
|
||||||
|
return compat_urlparse.urlunparse(parsed_url)
|
||||||
|
|
||||||
|
|
||||||
def orderedSet(iterable):
|
def orderedSet(iterable):
|
||||||
""" Remove all duplicates from the input iterable """
|
""" Remove all duplicates from the input iterable """
|
||||||
res = []
|
res = []
|
||||||
|
@ -1804,18 +1811,3 @@ def proxy_open(self, req, proxy, type):
|
||||||
return None # No Proxy
|
return None # No Proxy
|
||||||
return compat_urllib_request.ProxyHandler.proxy_open(
|
return compat_urllib_request.ProxyHandler.proxy_open(
|
||||||
self, req, proxy, type)
|
self, req, proxy, type)
|
||||||
|
|
||||||
|
|
||||||
def url_sanitize_consecutive_slashes(url):
|
|
||||||
"""Sanitize URLs with consecutive slashes
|
|
||||||
|
|
||||||
For example, transform both
|
|
||||||
http://hostname/foo//bar/filename.html
|
|
||||||
and
|
|
||||||
http://hostname//foo/bar/filename.html
|
|
||||||
into
|
|
||||||
http://hostname/foo/bar/filename.html
|
|
||||||
"""
|
|
||||||
parsed_url = list(compat_urlparse.urlparse(url))
|
|
||||||
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
|
|
||||||
return compat_urlparse.urlunparse(parsed_url)
|
|
||||||
|
|
Loading…
Reference in a new issue