mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-16 01:25:06 +00:00
[utils] Add a function to sanitize consecutive slashes in URLs
This commit is contained in:
parent
5c7495a194
commit
55969016e9
3 changed files with 34 additions and 5 deletions
|
@ -54,6 +54,7 @@
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
render_table,
|
render_table,
|
||||||
match_str,
|
match_str,
|
||||||
|
url_sanitize_consecutive_slashes,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -501,6 +502,21 @@ def test_match_str(self):
|
||||||
'like_count > 100 & dislike_count <? 50 & description',
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
{'like_count': 190, 'dislike_count': 10}))
|
{'like_count': 190, 'dislike_count': 10}))
|
||||||
|
|
||||||
|
def test_url_sanitize_consecutive_slashes(self):
|
||||||
|
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||||
|
'http://hostname/foo//bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||||
|
'http://hostname//foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||||
|
'http://hostname//'), 'http://hostname/')
|
||||||
|
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||||
|
'http://hostname/foo/bar/filename.html'),
|
||||||
|
'http://hostname/foo/bar/filename.html')
|
||||||
|
self.assertEqual(url_sanitize_consecutive_slashes(
|
||||||
|
'http://hostname/'), 'http://hostname/')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_request
|
compat_urllib_request
|
||||||
)
|
)
|
||||||
|
from ..utils import url_sanitize_consecutive_slashes
|
||||||
|
|
||||||
|
|
||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
|
@ -105,11 +106,8 @@ def _fetch_data(vid_id, mytv=False):
|
||||||
|
|
||||||
part_info = part_str.split('|')
|
part_info = part_str.split('|')
|
||||||
|
|
||||||
# Sanitize URL to prevent download failure
|
video_url = url_sanitize_consecutive_slashes(
|
||||||
if part_info[0][-1] == '/' and su[i][0] == '/':
|
'%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
|
||||||
su[i] = su[i][1:]
|
|
||||||
|
|
||||||
video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
|
|
||||||
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
|
|
@ -1789,3 +1789,18 @@ def proxy_open(self, req, proxy, type):
|
||||||
return None # No Proxy
|
return None # No Proxy
|
||||||
return compat_urllib_request.ProxyHandler.proxy_open(
|
return compat_urllib_request.ProxyHandler.proxy_open(
|
||||||
self, req, proxy, type)
|
self, req, proxy, type)
|
||||||
|
|
||||||
|
|
||||||
|
def url_sanitize_consecutive_slashes(url):
|
||||||
|
"""Sanitize URLs with consecutive slashes
|
||||||
|
|
||||||
|
For example, transform both
|
||||||
|
http://hostname/foo//bar/filename.html
|
||||||
|
and
|
||||||
|
http://hostname//foo/bar/filename.html
|
||||||
|
into
|
||||||
|
http://hostname/foo/bar/filename.html
|
||||||
|
"""
|
||||||
|
parsed_url = list(compat_urlparse.urlparse(url))
|
||||||
|
parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
|
||||||
|
return compat_urlparse.urlunparse(parsed_url)
|
||||||
|
|
Loading…
Reference in a new issue