From 061f62da54cb4184a039108e40dee8e9eb2611c1 Mon Sep 17 00:00:00 2001 From: ping Date: Thu, 20 Aug 2015 12:56:11 +0800 Subject: [PATCH 1/4] [vlive] New extractor for vlive.tv --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vlive.py | 94 ++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 youtube_dl/extractor/vlive.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1c53a5632..6bee5b63c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -735,6 +735,7 @@ VKIE, VKUserVideosIE, ) +from .vlive import VLiveIE from .vodlocker import VodlockerIE from .voicerepublic import VoiceRepublicIE from .vporn import VpornIE diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py new file mode 100644 index 000000000..b3bbd80fb --- /dev/null +++ b/youtube_dl/extractor/vlive.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hmac +from hashlib import sha1 +from base64 import b64encode +from time import time + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + determine_ext +) +from ..compat import compat_urllib_parse + + +class VLiveIE(InfoExtractor): + IE_NAME = 'vlive' + _VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://m.vlive.tv/video/1326', + 'md5': 'cc7314812855ce56de70a06a27314983', + 'info_dict': { + 'id': '1326', + 'ext': 'mp4', + 'title': '[V] Girl\'s Day\'s Broadcast', + 'creator': 'Girl\'s Day', + 'upload_date': '20150817', + }, + } + _SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH' + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://m.vlive.tv/video/%s' % video_id, + video_id, note='Download video page') + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + creator = self._html_search_regex( + r'([^<>]+)', webpage, 'creator') + upload_date = self._html_search_regex( + r'(\d{4}\.\d{2}\.\d{2})', webpage, + 'upload date', default=None, fatal=False) + if upload_date: + upload_date = upload_date.replace('.', '') + + url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id + msgpad = {'msgpad': '%.0f' % (time() * 1000)} + md = { + 'md': b64encode( + hmac.new(self._SECRET.encode('ascii'), + (url[:255] + msgpad['msgpad']).encode('ascii'), sha1).digest()) + } + url += '&' + compat_urllib_parse.urlencode(msgpad) + '&' + compat_urllib_parse.urlencode(md) + + playinfo = self._download_json(url, video_id, 'Downloading video json') + + if playinfo.get('message', '') != 'success': + raise ExtractorError(playinfo['message']) + + if not playinfo.get('result'): + raise ExtractorError('No videos found.') + + formats = [] + for vid in playinfo['result'].get('videos', {}).get('list', []): + formats.append({ + 'url': vid['source'], + 'ext': 'mp4', + 'abr': vid.get('bitrate', {}).get('audio'), + 'vbr': vid.get('bitrate', {}).get('video'), + 'format_id': vid['encodingOption']['name'], + 'height': vid.get('height'), + 'width': vid.get('width'), + }) + self._sort_formats(formats) + + subtitles = {} + for caption in playinfo['result'].get('captions', {}).get('list', []): + subtitles[caption['language']] = [ + {'ext': determine_ext(caption['source'], default_ext='vtt'), + 'url': caption['source']}] + + return { + 'id': video_id, + 'title': title, + 'creator': creator, + 'thumbnail': thumbnail, + 'formats': formats, + 'upload_date': upload_date, + 'subtitles': subtitles, + } From eba470f2f22389ab32164e4eb39067ceecf900f5 Mon Sep 17 00:00:00 2001 From: ping Date: Mon, 24 Aug 2015 16:30:00 +0800 Subject: [PATCH 2/4] [vlive] Remove upload_date extraction & cleanup --- youtube_dl/extractor/vlive.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index b3bbd80fb..6a403cc64 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -25,7 +25,6 @@ class VLiveIE(InfoExtractor): 'ext': 'mp4', 'title': '[V] Girl\'s Day\'s Broadcast', 'creator': 'Girl\'s Day', - 'upload_date': '20150817', }, } _SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH' @@ -41,21 +40,14 @@ def _real_extract(self, url): thumbnail = self._og_search_thumbnail(webpage) creator = self._html_search_regex( r'([^<>]+)', webpage, 'creator') - upload_date = self._html_search_regex( - r'(\d{4}\.\d{2}\.\d{2})', webpage, - 'upload date', default=None, fatal=False) - if upload_date: - upload_date = upload_date.replace('.', '') - + url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id - msgpad = {'msgpad': '%.0f' % (time() * 1000)} - md = { - 'md': b64encode( - hmac.new(self._SECRET.encode('ascii'), - (url[:255] + msgpad['msgpad']).encode('ascii'), sha1).digest()) - } - url += '&' + compat_urllib_parse.urlencode(msgpad) + '&' + compat_urllib_parse.urlencode(md) - + msgpad = '%.0f' % (time() * 1000) + md = b64encode( + hmac.new(self._SECRET.encode('ascii'), + (url[:255] + msgpad).encode('ascii'), sha1).digest() + ) + url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md}) playinfo = self._download_json(url, video_id, 'Downloading video json') if playinfo.get('message', '') != 'success': @@ -89,6 +81,5 @@ def _real_extract(self, url): 'creator': creator, 'thumbnail': thumbnail, 'formats': formats, - 'upload_date': upload_date, 'subtitles': subtitles, } From 615f155a3afd5c854fbf855c02f17ddb7f217b05 Mon Sep 17 00:00:00 2001 From: ping Date: Mon, 31 Aug 2015 12:46:28 +0800 Subject: [PATCH 3/4] [vlive] Fixes for review --- youtube_dl/extractor/vlive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 6a403cc64..17ae3b964 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -39,7 +39,7 @@ def _real_extract(self, url): title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) creator = self._html_search_regex( - r'([^<>]+)', webpage, 'creator') + r']+class="name">([^<>]+)', webpage, 'creator') url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id msgpad = '%.0f' % (time() * 1000) @@ -51,7 +51,7 @@ def _real_extract(self, url): playinfo = self._download_json(url, video_id, 'Downloading video json') if playinfo.get('message', '') != 'success': - raise ExtractorError(playinfo['message']) + raise ExtractorError(playinfo.get('message', 'JSON request unsuccessful')) if not playinfo.get('result'): raise ExtractorError('No videos found.') From d14f0c45fc8ab447d66caf7a3b9ce0888446eb7c Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 1 Sep 2015 11:11:14 +0800 Subject: [PATCH 4/4] [vlive] Add info about www. to m. redirects for mobile --- youtube_dl/extractor/vlive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 17ae3b964..a456f8217 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -16,6 +16,7 @@ class VLiveIE(InfoExtractor): IE_NAME = 'vlive' + # www.vlive.tv/video/ links redirect to m.vlive.tv/video/ for mobile devices _VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P[0-9]+)' _TEST = { 'url': 'http://m.vlive.tv/video/1326',