diff --git a/youtube_dl/extractor/melonvod.py b/youtube_dl/extractor/melonvod.py index 9b00ce8e3..2c80b3ba8 100644 --- a/youtube_dl/extractor/melonvod.py +++ b/youtube_dl/extractor/melonvod.py @@ -2,18 +2,20 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + urljoin, +) class MelonVODIE(InfoExtractor): - _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?.*mvId=(?P[0-9]+)' + _VALID_URL = r'https?://vod\.melon\.com/video/detail2\.html?\?.*?mvId=(?P[0-9]+)' _TEST = { 'url': 'http://vod.melon.com/video/detail2.htm?mvId=50158734', - 'md5': '461fc04c6d23cbf49f4fef4d61851d32', 'info_dict': { 'id': '50158734', 'ext': 'mp4', - 'title': 'Jessica \'Wonderland\' MV Making Film', + 'title': "Jessica 'Wonderland' MV Making Film", 'thumbnail': 're:^https?://.*\.jpg$', 'artist': 'Jessica (์ œ์‹œ์นด)', 'upload_date': '20161212', @@ -29,24 +31,35 @@ def _real_extract(self, url): play_info = self._download_json( 'http://vod.melon.com/video/playerInfo.json', video_id, - note='Downloading playerInfo', query={'mvId': video_id} - ) + note='Downloading player info JSON', query={'mvId': video_id}) + title = play_info['mvInfo']['MVTITLE'] - artist = ', '.join([artist['ARTISTNAMEWEBLIST'] for artist in play_info.get('artistList', [])]) info = self._download_json( 'http://vod.melon.com/delivery/streamingInfo.json', video_id, - note='Downloading streamingInfo', - query={'contsId': video_id, 'contsType': 'VIDEO'} - ) - stream_info = info.get('streamingInfo', {}) - m3u8_url = stream_info.get('encUrl') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') + note='Downloading streaming info JSON', + query={ + 'contsId': video_id, + 'contsType': 'VIDEO', + }) + + stream_info = info['streamingInfo'] + + formats = self._extract_m3u8_formats( + stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls') self._sort_formats(formats) - thumbnail = info.get('staticDomain', '') + stream_info.get('imgPath', '') + artist_list = play_info.get('artistList') + artist = None + if isinstance(artist_list, list): + artist = ', '.join( + [a['ARTISTNAMEWEBLIST'] + for a in artist_list if a.get('ARTISTNAMEWEBLIST')]) + + thumbnail = urljoin(info.get('staticDomain'), stream_info.get('imgPath')) + duration = int_or_none(stream_info.get('playTime')) - upload_date = stream_info.get('mvSvcOpenDt', '')[:8] + upload_date = stream_info.get('mvSvcOpenDt', '')[:8] or None return { 'id': video_id,