mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-22 02:15:12 +00:00
[Motherless] Fix extractor (#809)
Authored-by: coletdjnz Fixes #806, https://github.com/ytdl-org/youtube-dl/issues/29626
This commit is contained in:
parent
54153fb71b
commit
419508eabb
1 changed files with 24 additions and 6 deletions
|
@ -127,9 +127,9 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
comment_count = webpage.count('class="media-comment-contents"')
|
comment_count = webpage.count('class="media-comment-contents"')
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
|
(r'"media-meta-member">\s+<a href="/m/([^"]+)"',
|
||||||
webpage, 'uploader_id')
|
r'<span\b[^>]+\bclass="username">([^<]+)</span>'),
|
||||||
|
webpage, 'uploader_id', fatal=False)
|
||||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||||
if categories:
|
if categories:
|
||||||
categories = [cat.strip() for cat in categories.split(',')]
|
categories = [cat.strip() for cat in categories.split(',')]
|
||||||
|
@ -169,7 +169,18 @@ class MotherlessGroupIE(InfoExtractor):
|
||||||
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
|
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
|
||||||
'any kind!'
|
'any kind!'
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 0,
|
||||||
|
'expected_warnings': [
|
||||||
|
'This group has no videos.',
|
||||||
|
]
|
||||||
|
}, {
|
||||||
|
'url': 'https://motherless.com/g/beautiful_cock',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'beautiful_cock',
|
||||||
|
'title': 'Beautiful Cock',
|
||||||
|
'description': 'Group for lovely cocks yours, mine, a friends anything human',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 2500,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -209,11 +220,18 @@ def _real_extract(self, url):
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
'description', webpage, fatal=False)
|
'description', webpage, fatal=False)
|
||||||
page_count = self._int(self._search_regex(
|
page_count = self._int(self._search_regex(
|
||||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
|
r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">',
|
||||||
webpage, 'page_count'), 'page_count')
|
webpage, 'page_count', default=0), 'page_count')
|
||||||
|
if not page_count:
|
||||||
|
message = self._search_regex(
|
||||||
|
r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*',
|
||||||
|
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
||||||
|
self.report_warning(message, group_id)
|
||||||
PAGE_SIZE = 80
|
PAGE_SIZE = 80
|
||||||
|
|
||||||
def _get_page(idx):
|
def _get_page(idx):
|
||||||
|
if not page_count:
|
||||||
|
return
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
page_url, group_id, query={'page': idx + 1},
|
page_url, group_id, query={'page': idx + 1},
|
||||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||||
|
|
Loading…
Reference in a new issue