From 19b9de13c4e83416f09708dddecba6edc69b4525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 30 Jul 2018 23:28:44 +0700 Subject: [PATCH] [watchbox] Fix extraction (closes #17107) --- youtube_dl/extractor/watchbox.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/watchbox.py b/youtube_dl/extractor/watchbox.py index d99313080..5a4e46e73 100644 --- a/youtube_dl/extractor/watchbox.py +++ b/youtube_dl/extractor/watchbox.py @@ -10,6 +10,7 @@ js_to_json, strip_or_none, try_get, + unescapeHTML, unified_timestamp, ) @@ -67,12 +68,20 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) - source = (self._parse_json( + player_config = self._parse_json( self._search_regex( - r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config', - default='{}'), - video_id, transform_source=js_to_json, - fatal=False) or {}).get('source') or {} + r'data-player-conf=(["\'])(?P{.+?})\1', webpage, + 'player config', default='{}', group='data'), + video_id, transform_source=unescapeHTML, fatal=False) + + if not player_config: + player_config = self._parse_json( + self._search_regex( + r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) or {} + + source = player_config.get('source') or {} video_id = compat_str(source.get('videoId') or video_id)