From a346b1ff57a94382e80fd4edd5a6d4b91a7cb45e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 4 Aug 2015 20:44:22 +0600 Subject: [PATCH] [bbc] Add support for vxp-playlist-data embeds (Closes #6453) --- youtube_dl/extractor/bbc.py | 45 ++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 9a1b6e3dc..abc5a44a1 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -526,6 +526,18 @@ class BBCIE(BBCCoUkIE): 'params': { 'skip_download': True, } + }, { + # single video from video playlist embedded with vxp-playlist-data JSON + 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376', + 'info_dict': { + 'id': 'p02w6qjc', + 'ext': 'mp4', + 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''', + 'duration': 56, + }, + 'params': { + 'skip_download': True, + } }, { # single video story with digitalData 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret', @@ -695,13 +707,36 @@ def extract_all(pattern): if not medias: # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international) - media_asset_page = self._parse_json( + media_asset = self._search_regex( + r'mediaAssetPage\.init\(\s*({.+?}), "/', + webpage, 'media asset', default=None) + if media_asset: + media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False) + medias = [] + for video in media_asset_page.get('videos', {}).values(): + medias.extend(video.values()) + + if not medias: + # Multiple video playlist with single `now playing` entry (e.g. + # http://www.bbc.com/news/video_and_audio/must_see/33767813) + vxp_playlist = self._parse_json( self._search_regex( - r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'), + r']+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)', + webpage, 'playlist data'), playlist_id) - medias = [] - for video in media_asset_page.get('videos', {}).values(): - medias.extend(video.values()) + playlist_medias = [] + for item in vxp_playlist: + media = item.get('media') + if not media: + continue + playlist_medias.append(media) + # Download single video if found media with asset id matching the video id from URL + if item.get('advert', {}).get('assetId') == playlist_id: + medias = [media] + break + # Fallback to the whole playlist + if not medias: + medias = playlist_medias entries = [] for num, media_meta in enumerate(medias, start=1):