diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a285ee7d8..2763d2ffe 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1061,7 +1061,7 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, m3u8_id=None, note=None, errnote=None, - fatal=True): + fatal=True, live=False): formats = [{ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), @@ -1139,7 +1139,11 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, if m3u8_id: format_id.append(m3u8_id) last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None - format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) + # Bandwidth of live streams may differ over time thus making + # format_id unpredictable. So it's better to keep provided + # format_id intact. + if last_media_name and not live: + format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), 'url': format_url(line.strip()), diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index baf39bb2c..7f9e99ec2 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -1,9 +1,13 @@ # coding: utf-8 -from __future__ import unicode_literals +from __future__ import division, unicode_literals + +import re +import time from .common import InfoExtractor from ..utils import ( dict_get, + ExtractorError, float_or_none, int_or_none, ) @@ -31,16 +35,76 @@ def _real_extract(self, url): webpage = self._download_webpage( 'http://www.vlive.tv/video/%s' % video_id, video_id) - long_video_id = self._search_regex( - r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"', - webpage, 'long video id') + # UTC+x - UTC+9 (KST) + tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone + tz_offset = -tz // 60 - 9 * 60 + self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset) - key = self._search_regex( - r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"', - webpage, 'key') + status_params = self._download_json( + 'http://www.vlive.tv/video/status?videoSeq=%s' % video_id, + video_id, 'Downloading JSON status', + headers={'Referer': url}) + status = status_params.get('status') + air_start = status_params.get('onAirStartAt', '') + is_live = status_params.get('isLive') + video_params = self._search_regex( + r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)', + webpage, 'video params') + live_params, long_video_id, key = re.split( + r'"\s*,\s*"', video_params)[1:4] + + if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': + live_params = self._parse_json('"%s"' % live_params, video_id) + live_params = self._parse_json(live_params, video_id) + return self._live(video_id, webpage, live_params) + elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': + if long_video_id and key: + return self._replay(video_id, webpage, long_video_id, key) + elif is_live: + status = 'LIVE_END' + else: + status = 'COMING_SOON' + + if status == 'LIVE_END': + raise ExtractorError('Uploading for replay. Please wait...', + expected=True) + elif status == 'COMING_SOON': + raise ExtractorError('Coming soon! %s' % air_start, expected=True) + elif status == 'CANCELED': + raise ExtractorError('We are sorry, ' + 'but the live broadcast has been canceled.', + expected=True) + else: + raise ExtractorError('Unknown status %s' % status) + + def _get_common_fields(self, webpage): title = self._og_search_title(webpage) + creator = self._html_search_regex( + r']+class="info_area"[^>]*>\s*]*>([^<]+)', + webpage, 'creator', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + return { + 'title': title, + 'creator': creator, + 'thumbnail': thumbnail, + } + def _live(self, video_id, webpage, live_params): + formats = [] + for vid in live_params.get('resolutions', []): + formats.extend(self._extract_m3u8_formats( + vid['cdnUrl'], video_id, 'mp4', + m3u8_id=vid.get('name'), + fatal=False, live=True)) + self._sort_formats(formats) + + return dict(self._get_common_fields(webpage), + id=video_id, + formats=formats, + is_live=True) + + def _replay(self, video_id, webpage, long_video_id, key): playinfo = self._download_json( 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' % compat_urllib_parse_urlencode({ @@ -62,11 +126,6 @@ def _real_extract(self, url): } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')] self._sort_formats(formats) - thumbnail = self._og_search_thumbnail(webpage) - creator = self._html_search_regex( - r']+class="info_area"[^>]*>\s*]*>([^<]+)', - webpage, 'creator', fatal=False) - view_count = int_or_none(playinfo.get('meta', {}).get('count')) subtitles = {} @@ -77,12 +136,8 @@ def _real_extract(self, url): 'ext': 'vtt', 'url': caption['source']}] - return { - 'id': video_id, - 'title': title, - 'creator': creator, - 'thumbnail': thumbnail, - 'view_count': view_count, - 'formats': formats, - 'subtitles': subtitles, - } + return dict(self._get_common_fields(webpage), + id=video_id, + formats=formats, + view_count=view_count, + subtitles=subtitles)