[ie/NoodleMagazine] Fix extraction (#7830)

Closes #7917
Authored by: RedDeffender
This commit is contained in:
RedDeffender 2023-08-31 01:26:45 +02:00 committed by GitHub
parent 099fb1b35c
commit bae4834245
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 21 additions and 10 deletions

View File

@ -1,9 +1,14 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_duration, extract_attributes,
get_element_html_by_id,
int_or_none,
parse_count, parse_count,
unified_strdate parse_duration,
unified_strdate,
urljoin,
) )
from ..utils.traversal import traverse_obj
class NoodleMagazineIE(InfoExtractor): class NoodleMagazineIE(InfoExtractor):
@ -37,15 +42,21 @@ class NoodleMagazineIE(InfoExtractor):
like_count = parse_count(self._html_search_meta('ya:ovs:likes', webpage, default=None)) like_count = parse_count(self._html_search_meta('ya:ovs:likes', webpage, default=None))
upload_date = unified_strdate(self._html_search_meta('ya:ovs:upload_date', webpage, default='')) upload_date = unified_strdate(self._html_search_meta('ya:ovs:upload_date', webpage, default=''))
key = self._html_search_regex(rf'/{video_id}\?(?:.*&)?m=([^&"\'\s,]+)', webpage, 'key') player_path = extract_attributes(get_element_html_by_id('iplayer', webpage) or '')['src']
playlist_info = self._download_json(f'https://adult.noodlemagazine.com/playlist/{video_id}?m={key}', video_id) player_iframe = self._download_webpage(
thumbnail = self._og_search_property('image', webpage, default=None) or playlist_info.get('image') urljoin('https://adult.noodlemagazine.com', player_path), video_id, 'Downloading iframe page')
playlist_url = self._search_regex(
r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
playlist_info = self._download_json(
urljoin('https://adult.noodlemagazine.com', playlist_url), video_id, headers={'Referer': url})
formats = [{ thumbnail = self._og_search_property('image', webpage, default=None) or playlist_info.get('image')
'url': source.get('file'), formats = traverse_obj(playlist_info, ('sources', lambda _, v: v['file'], {
'quality': source.get('label'), 'url': 'file',
'ext': source.get('type'), 'format_id': 'label',
} for source in playlist_info.get('sources')] 'height': ('label', {int_or_none}),
'ext': 'type',
}))
return { return {
'id': video_id, 'id': video_id,