0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-04 23:35:04 +00:00

[wrzuta:playlist] Improve and simplify (Closes #9341)

This commit is contained in:
Sergey M․ 2016-06-14 02:13:54 +07:00
parent fea55ef4a9
commit 1759672eed
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 27 additions and 28 deletions

View file

@ -980,8 +980,10 @@
from .wimp import WimpIE from .wimp import WimpIE
from .wistia import WistiaIE from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE
from .wrzuta import WrzutaIE from .wrzuta import (
from .wrzuta import WrzutaPlaylistIE WrzutaIE,
WrzutaPlaylistIE,
)
from .wsj import WSJIE from .wsj import WSJIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE from .xboxclips import XboxClipsIE

View file

@ -7,6 +7,7 @@
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
qualities, qualities,
remove_start,
) )
@ -82,10 +83,6 @@ def _real_extract(self, url):
} }
_ENTRY_PATTERN = r'<a href="(?P<playlist_entry_url>[^"]+)" target="_blank" class="playlist\-file\-page">'
_PLAYLIST_SIZE_PATTERN = r'<div class="playlist-counter">[0-9]+/([0-9]+)</div>'
class WrzutaPlaylistIE(InfoExtractor): class WrzutaPlaylistIE(InfoExtractor):
""" """
this class covers extraction of wrzuta playlist entries this class covers extraction of wrzuta playlist entries
@ -101,10 +98,7 @@ class WrzutaPlaylistIE(InfoExtractor):
""" """
IE_NAME = 'wrzuta.pl:playlist' IE_NAME = 'wrzuta.pl:playlist'
_VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P<id>[0-9a-zA-Z]+)'
_VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \
'(?P<id>[0-9a-zA-Z]+)/.*'
_TESTS = [{ _TESTS = [{
'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza',
'playlist_mincount': 14, 'playlist_mincount': 14,
@ -119,6 +113,9 @@ class WrzutaPlaylistIE(InfoExtractor):
'id': '6Nj3wQHx756', 'id': '6Nj3wQHx756',
'title': 'Lipiec - Lato 2015 Muzyka Świata', 'title': 'Lipiec - Lato 2015 Muzyka Świata',
}, },
}, {
'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -126,31 +123,31 @@ def _real_extract(self, url):
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
uploader = mobj.group('uploader') uploader = mobj.group('uploader')
entries = []
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist') playlist_size = int_or_none(self._html_search_regex(
playlist_size = int(playlist_size) if playlist_size else 0 (r'<div[^>]+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)',
r'<div[^>]+class=["\']all-counter["\'][^>]*>(.+?)</div>'),
webpage, 'playlist size', default=None))
playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1) playlist_title = remove_start(
self._og_search_title(webpage), 'Playlista: ')
entries = []
if playlist_size: if playlist_size:
entries = list(map( entries = [
lambda entry_url: self.url_result(entry_url), self.url_result(entry_url)
re.findall(_ENTRY_PATTERN, webpage) for _, entry_url in re.findall(
)) r'<a[^>]+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page',
webpage)]
if playlist_size > len(entries): if playlist_size > len(entries):
playlist_content = self._download_json( playlist_content = self._download_json(
'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format( 'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id),
uploader_id=uploader,
playlist_id=playlist_id,
),
playlist_id, playlist_id,
'Downloading playlist content as JSON metadata', 'Downloading playlist JSON',
'Unable to download playlist content as JSON metadata', 'Unable to download playlist JSON')
) entries.extend([
entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']] self.url_result(entry['filelink'])
for entry in playlist_content.get('files', []) if entry.get('filelink')])
return self.playlist_result(entries, playlist_id, playlist_title) return self.playlist_result(entries, playlist_id, playlist_title)