mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-05 06:21:01 +00:00
Add webpage_url_basename info_dict field (Fixes #1938)
This commit is contained in:
parent
44c471c3b8
commit
29eb517403
3 changed files with 27 additions and 9 deletions
|
@ -13,20 +13,21 @@
|
||||||
|
|
||||||
#from youtube_dl.utils import htmlentity_transform
|
#from youtube_dl.utils import htmlentity_transform
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
timeconvert,
|
|
||||||
sanitize_filename,
|
|
||||||
unescapeHTML,
|
|
||||||
orderedSet,
|
|
||||||
DateRange,
|
DateRange,
|
||||||
unified_strdate,
|
encodeFilename,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
get_meta_content,
|
get_meta_content,
|
||||||
xpath_with_ns,
|
orderedSet,
|
||||||
smuggle_url,
|
sanitize_filename,
|
||||||
unsmuggle_url,
|
|
||||||
shell_quote,
|
shell_quote,
|
||||||
encodeFilename,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
timeconvert,
|
||||||
|
unescapeHTML,
|
||||||
|
unified_strdate,
|
||||||
|
unsmuggle_url,
|
||||||
|
url_basename,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
|
@ -181,6 +182,12 @@ def test_str_to_int(self):
|
||||||
self.assertEqual(str_to_int('123,456'), 123456)
|
self.assertEqual(str_to_int('123,456'), 123456)
|
||||||
self.assertEqual(str_to_int('123.456'), 123456)
|
self.assertEqual(str_to_int('123.456'), 123456)
|
||||||
|
|
||||||
|
def test_url_basename(self):
|
||||||
|
self.assertEqual(url_basename(u'http://foo.de/'), u'')
|
||||||
|
self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
|
||||||
|
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
|
||||||
|
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
|
||||||
|
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -47,6 +47,7 @@
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
takewhile_inclusive,
|
takewhile_inclusive,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
|
url_basename,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
write_string,
|
write_string,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
|
@ -484,6 +485,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||||
{
|
{
|
||||||
'extractor': ie.IE_NAME,
|
'extractor': ie.IE_NAME,
|
||||||
'webpage_url': url,
|
'webpage_url': url,
|
||||||
|
'webpage_url_basename': url_basename(url),
|
||||||
'extractor_key': ie.ie_key(),
|
'extractor_key': ie.ie_key(),
|
||||||
})
|
})
|
||||||
if process:
|
if process:
|
||||||
|
@ -576,6 +578,7 @@ def make_result(embedded_info):
|
||||||
'playlist_index': i + playliststart,
|
'playlist_index': i + playliststart,
|
||||||
'extractor': ie_result['extractor'],
|
'extractor': ie_result['extractor'],
|
||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -596,6 +599,7 @@ def _fixup(r):
|
||||||
{
|
{
|
||||||
'extractor': ie_result['extractor'],
|
'extractor': ie_result['extractor'],
|
||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
})
|
})
|
||||||
return r
|
return r
|
||||||
|
|
|
@ -1084,3 +1084,10 @@ def remove_start(s, start):
|
||||||
if s.startswith(start):
|
if s.startswith(start):
|
||||||
return s[len(start):]
|
return s[len(start):]
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def url_basename(url):
|
||||||
|
m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
|
||||||
|
if not m:
|
||||||
|
return u''
|
||||||
|
return m.group(1)
|
||||||
|
|
Loading…
Reference in a new issue