0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-12-22 06:00:00 +00:00

[extractor/zingmp3] Fix and improve extractors (#6367)

Authored by: hatienl0i261299
This commit is contained in:
Ha Tien Loi 2023-05-29 12:02:16 +07:00 committed by GitHub
parent bfdf144c7e
commit 17d7ca84ea
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 30 deletions

View file

@ -2483,6 +2483,7 @@
ZingMp3WeekChartIE, ZingMp3WeekChartIE,
ZingMp3ChartMusicVideoIE, ZingMp3ChartMusicVideoIE,
ZingMp3UserIE, ZingMp3UserIE,
ZingMp3HubIE,
) )
from .zoom import ZoomIE from .zoom import ZoomIE
from .zype import ZypeIE from .zype import ZypeIE

View file

@ -1,16 +1,11 @@
import functools
import hashlib import hashlib
import hmac import hmac
import itertools
import json import json
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import int_or_none, traverse_obj, try_call, urljoin
OnDemandPagedList,
int_or_none,
traverse_obj,
urljoin,
)
class ZingMp3BaseIE(InfoExtractor): class ZingMp3BaseIE(InfoExtractor):
@ -37,6 +32,7 @@ class ZingMp3BaseIE(InfoExtractor):
'info-artist': '/api/v2/page/get/artist', 'info-artist': '/api/v2/page/get/artist',
'user-list-song': '/api/v2/song/get/list', 'user-list-song': '/api/v2/song/get/list',
'user-list-video': '/api/v2/video/get/list', 'user-list-video': '/api/v2/video/get/list',
'hub': '/api/v2/page/get/hub-detail',
} }
def _api_url(self, url_type, params): def _api_url(self, url_type, params):
@ -46,9 +42,9 @@ def _api_url(self, url_type, params):
''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest() ''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest()
data = { data = {
**params, **params,
'apiKey': '88265e23d4284f25963e6eedac8fbfa3', 'apiKey': 'X5BM3w8N7MKozC0B85o4KMlzLZKhV00y',
'sig': hmac.new( 'sig': hmac.new(b'acOrvUS15XRW2o9JksiK1KgQ6Vbds8ZW',
b'2aa2d1c561e809b267f3638c4a307aab', f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(), f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(),
} }
return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}' return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}'
@ -67,6 +63,19 @@ def _parse_items(self, items):
for url in traverse_obj(items, (..., 'link')) or []: for url in traverse_obj(items, (..., 'link')) or []:
yield self.url_result(urljoin(self._DOMAIN, url)) yield self.url_result(urljoin(self._DOMAIN, url))
def _fetch_page(self, id_, url_type, page):
raise NotImplementedError('This method must be implemented by subclasses')
def _paged_list(self, _id, url_type):
count = 0
for page in itertools.count(1):
data = self._fetch_page(_id, url_type, page)
entries = list(self._parse_items(data.get('items')))
count += len(entries)
yield from entries
if not data.get('hasMore') or try_call(lambda: count > data['total']):
break
class ZingMp3IE(ZingMp3BaseIE): class ZingMp3IE(ZingMp3BaseIE):
_VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed' _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed'
@ -166,8 +175,11 @@ def _real_extract(self, url):
'height': int_or_none(res), 'height': int_or_none(res),
}) })
if not formats and item.get('msg') == 'Sorry, this content is not available in your country.': if not formats:
if item.get('msg') == 'Sorry, this content is not available in your country.':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
else:
self.raise_no_formats('The song is only for VIP accounts.')
lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file') lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file')
@ -200,7 +212,7 @@ class ZingMp3AlbumIE(ZingMp3BaseIE):
'id': 'ZWZAEZZD', 'id': 'ZWZAEZZD',
'title': 'Những Bài Hát Hay Nhất Của Mr. Siro', 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro',
}, },
'playlist_mincount': 49, 'playlist_mincount': 20,
}, { }, {
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
'only_matching': True, 'only_matching': True,
@ -305,22 +317,20 @@ class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE):
'id': 'IWZ9Z086', 'id': 'IWZ9Z086',
'title': 'the-loai-video_Khong-Loi', 'title': 'the-loai-video_Khong-Loi',
}, },
'playlist_mincount': 10, 'playlist_mincount': 1,
}] }]
def _fetch_page(self, song_id, url_type, page): def _fetch_page(self, song_id, url_type, page):
return self._parse_items(self._call_api(url_type, { return self._call_api(url_type, {
'id': song_id, 'id': song_id,
'type': 'genre', 'type': 'genre',
'page': page + 1, 'page': page,
'count': self._PER_PAGE 'count': self._PER_PAGE
}).get('items')) })
def _real_extract(self, url): def _real_extract(self, url):
song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type') song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type')
return self.playlist_result( return self.playlist_result(self._paged_list(song_id, url_type), song_id, f'{url_type}_{regions}')
OnDemandPagedList(functools.partial(self._fetch_page, song_id, url_type), self._PER_PAGE),
song_id, f'{url_type}_{regions}')
class ZingMp3UserIE(ZingMp3BaseIE): class ZingMp3UserIE(ZingMp3BaseIE):
@ -331,7 +341,7 @@ class ZingMp3UserIE(ZingMp3BaseIE):
'info_dict': { 'info_dict': {
'id': 'IWZ98609', 'id': 'IWZ98609',
'title': 'Mr. Siro - bai-hat', 'title': 'Mr. Siro - bai-hat',
'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', 'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
}, },
'playlist_mincount': 91, 'playlist_mincount': 91,
}, { }, {
@ -339,7 +349,7 @@ class ZingMp3UserIE(ZingMp3BaseIE):
'info_dict': { 'info_dict': {
'id': 'IWZ98609', 'id': 'IWZ98609',
'title': 'Mr. Siro - album', 'title': 'Mr. Siro - album',
'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', 'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
}, { }, {
@ -347,7 +357,7 @@ class ZingMp3UserIE(ZingMp3BaseIE):
'info_dict': { 'info_dict': {
'id': 'IWZ98609', 'id': 'IWZ98609',
'title': 'Mr. Siro - single', 'title': 'Mr. Siro - single',
'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', 'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
}, },
'playlist_mincount': 20, 'playlist_mincount': 20,
}, { }, {
@ -355,19 +365,19 @@ class ZingMp3UserIE(ZingMp3BaseIE):
'info_dict': { 'info_dict': {
'id': 'IWZ98609', 'id': 'IWZ98609',
'title': 'Mr. Siro - video', 'title': 'Mr. Siro - video',
'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', 'description': 'md5:5bdcf45e955dc1b8d7f518f322ffef36',
}, },
'playlist_mincount': 15, 'playlist_mincount': 15,
}] }]
def _fetch_page(self, user_id, url_type, page): def _fetch_page(self, user_id, url_type, page):
url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video' url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video'
return self._parse_items(self._call_api(url_type, { return self._call_api(url_type, {
'id': user_id, 'id': user_id,
'type': 'artist', 'type': 'artist',
'page': page + 1, 'page': page,
'count': self._PER_PAGE 'count': self._PER_PAGE
}, query={'sort': 'new', 'sectionId': 'aSong'}).get('items')) })
def _real_extract(self, url): def _real_extract(self, url):
user_alias, url_type = self._match_valid_url(url).group('user', 'type') user_alias, url_type = self._match_valid_url(url).group('user', 'type')
@ -376,10 +386,41 @@ def _real_extract(self, url):
user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias}) user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias})
if url_type in ('bai-hat', 'video'): if url_type in ('bai-hat', 'video'):
entries = OnDemandPagedList( entries = self._paged_list(user_info['id'], url_type)
functools.partial(self._fetch_page, user_info['id'], url_type), self._PER_PAGE)
else: else:
entries = self._parse_items(traverse_obj(user_info, ( entries = self._parse_items(traverse_obj(user_info, (
'sections', lambda _, v: v['link'] == f'/{user_alias}/{url_type}', 'items', ...))) 'sections',
lambda _, v: v['sectionId'] == 'aAlbum' if url_type == 'album' else v['sectionId'] == 'aSingle',
'items', ...)))
return self.playlist_result( return self.playlist_result(
entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography')) entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography'))
class ZingMp3HubIE(ZingMp3BaseIE):
IE_NAME = 'zingmp3:hub'
_VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>hub)/(?P<regions>[^/]+)/(?P<id>[^\.]+)'
_TESTS = [{
'url': 'https://zingmp3.vn/hub/Nhac-Moi/IWZ9Z0CA.html',
'info_dict': {
'id': 'IWZ9Z0CA',
'title': 'Nhạc Mới',
'description': 'md5:1cc31b68a6f746427b07b2756c22a558',
},
'playlist_mincount': 20,
}, {
'url': 'https://zingmp3.vn/hub/Nhac-Viet/IWZ9Z087.html',
'info_dict': {
'id': 'IWZ9Z087',
'title': 'Nhạc Việt',
'description': 'md5:acc976c8bdde64d5c6ee4a92c39f7a77',
},
'playlist_mincount': 30,
}]
def _real_extract(self, url):
song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type')
hub_detail = self._call_api(url_type, {'id': song_id})
entries = self._parse_items(traverse_obj(hub_detail, (
'sections', lambda _, v: v['sectionId'] == 'hub', 'items', ...)))
return self.playlist_result(
entries, song_id, hub_detail.get('title'), hub_detail.get('description'))