Add the 'webpage_url' field to info_dict

The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
This commit is contained in:
Jaime Marquínez Ferrándiz 2013-11-03 12:11:13 +01:00
parent b6c45014ae
commit 9103bbc5cd
5 changed files with 24 additions and 11 deletions

View File

@ -148,6 +148,9 @@ def try_rm_tcs_files():
# Check for the presence of mandatory fields # Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'): for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key]) self.assertTrue(key in info_dict.keys() and info_dict[key])
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor']:
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
finally: finally:
try_rm_tcs_files() try_rm_tcs_files()

View File

@ -354,8 +354,11 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}):
'_type': 'compat_list', '_type': 'compat_list',
'entries': ie_result, 'entries': ie_result,
} }
if 'extractor' not in ie_result: self.add_extra_info(ie_result,
ie_result['extractor'] = ie.IE_NAME {
'extractor': ie.IE_NAME,
'webpage_url': url
})
return self.process_ie_result(ie_result, download, extra_info) return self.process_ie_result(ie_result, download, extra_info)
except ExtractorError as de: # An error we somewhat expected except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback()) self.report_error(compat_str(de), de.format_traceback())
@ -417,6 +420,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
'playlist': playlist, 'playlist': playlist,
'playlist_index': i + playliststart, 'playlist_index': i + playliststart,
'extractor': ie_result['extractor'], 'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
} }
entry_result = self.process_ie_result(entry, entry_result = self.process_ie_result(entry,
download=download, download=download,
@ -427,7 +431,10 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
elif result_type == 'compat_list': elif result_type == 'compat_list':
def _fixup(r): def _fixup(r):
self.add_extra_info(r, self.add_extra_info(r,
{'extractor': ie_result['extractor']}) {
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
})
return r return r
ie_result['entries'] = [ ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info) self.process_ie_result(_fixup(r), download, extra_info)

View File

@ -71,6 +71,9 @@ class InfoExtractor(object):
("3D" or "DASH video") ("3D" or "DASH video")
* width Width of the video, if known * width Width of the video, if known
* height Height of the video, if known * height Height of the video, if known
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, the fields should be Unicode strings.

View File

@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$' _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_NETRC_MACHINE = 'vimeo' _NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo' IE_NAME = u'vimeo'
_TESTS = [ _TESTS = [
@ -128,11 +128,9 @@ def _real_extract(self, url, new_video=True):
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id') video_id = mobj.group('id')
if not mobj.group('proto'): if mobj.group('pro') or mobj.group('player'):
url = 'https://' + url
elif mobj.group('pro'):
url = 'http://player.vimeo.com/video/' + video_id url = 'http://player.vimeo.com/video/' + video_id
elif mobj.group('direct_link'): else:
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
@ -234,7 +232,7 @@ def _real_extract(self, url, new_video=True):
if len(formats) == 0: if len(formats) == 0:
raise ExtractorError(u'No known codec found') raise ExtractorError(u'No known codec found')
return [{ return {
'id': video_id, 'id': video_id,
'uploader': video_uploader, 'uploader': video_uploader,
'uploader_id': video_uploader_id, 'uploader_id': video_uploader_id,
@ -243,7 +241,8 @@ def _real_extract(self, url, new_video=True):
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'description': video_description, 'description': video_description,
'formats': formats, 'formats': formats,
}] 'webpage_url': url,
}
class VimeoChannelIE(InfoExtractor): class VimeoChannelIE(InfoExtractor):

View File

@ -1485,7 +1485,8 @@ def _real_extract(self, url):
'subtitles': video_subtitles, 'subtitles': video_subtitles,
'duration': video_duration, 'duration': video_duration,
'age_limit': 18 if age_gate else 0, 'age_limit': 18 if age_gate else 0,
'annotations': video_annotations 'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
}) })
return results return results