[ie/ondemandkorea] Overhaul extractor (#8386)

Closes #8374
Authored by: seproDev
This commit is contained in:
sepro 2023-11-11 20:57:56 +01:00 committed by GitHub
parent 3ff494f6f4
commit 05adfd883a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 142 additions and 59 deletions

View File

@ -1387,7 +1387,10 @@
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE
from .olympics import OlympicsReplayIE from .olympics import OlympicsReplayIE
from .on24 import On24IE from .on24 import On24IE
from .ondemandkorea import OnDemandKoreaIE from .ondemandkorea import (
OnDemandKoreaIE,
OnDemandKoreaProgramIE,
)
from .onefootball import OneFootballIE from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE from .oneplace import OnePlacePodcastIE

View File

@ -1,87 +1,167 @@
import functools
import re import re
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
js_to_json, OnDemandPagedList,
float_or_none,
int_or_none,
join_nonempty,
parse_age_limit,
parse_qs,
unified_strdate,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class OnDemandKoreaIE(InfoExtractor): class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/[a-z0-9-]+\?(?:[^#]+&)?contentId=(?P<id>\d+)'
_GEO_COUNTRIES = ['US', 'CA'] _GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{ _TESTS = [{
'url': 'https://www.ondemandkorea.com/ask-us-anything-e351.html', 'url': 'https://www.ondemandkorea.com/player/vod/ask-us-anything?contentId=686471',
'md5': 'e2ff77255d989e3135bde0c5889fbce8',
'info_dict': { 'info_dict': {
'id': 'ask-us-anything-e351', 'id': '686471',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ask Us Anything : Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won - 09/24/2022', 'title': 'Ask Us Anything: Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
'description': 'A talk show/game show with a school theme where celebrity guests appear as “transfer students.”', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'thumbnail': r're:^https?://.*\.jpg$', 'duration': 5486.955,
'release_date': '20220924',
'series': 'Ask Us Anything',
'series_id': 11790,
'episode_number': 351,
'episode': 'Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
}, },
'params': {
'skip_download': 'm3u8 download'
}
}, { }, {
'url': 'https://www.ondemandkorea.com/work-later-drink-now-e1.html', 'url': 'https://www.ondemandkorea.com/player/vod/breakup-probation-a-week?contentId=1595796',
'md5': '57266c720006962be7ff415b24775caa',
'info_dict': { 'info_dict': {
'id': 'work-later-drink-now-e1', 'id': '1595796',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Work Later, Drink Now : E01', 'title': 'Breakup Probation, A Week: E08',
'description': 'Work Later, Drink First follows three women who find solace in a glass of liquor at the end of the day. So-hee, who gets comfort from a cup of soju af', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'thumbnail': r're:^https?://.*\.png$', 'duration': 1586.0,
'subtitles': { 'release_date': '20231001',
'English': 'mincount:1', 'series': 'Breakup Probation, A Week',
}, 'series_id': 22912,
'episode_number': 8,
'episode': 'E08',
}, },
'params': { }, {
'skip_download': 'm3u8 download' 'url': 'https://www.ondemandkorea.com/player/vod/the-outlaws?contentId=369531',
} 'md5': 'fa5523b87aa1f6d74fc622a97f2b47cd',
'info_dict': {
'id': '369531',
'ext': 'mp4',
'release_date': '20220519',
'duration': 7267.0,
'title': 'The Outlaws: Main Movie',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'age_limit': 18,
},
}, {
'url': 'https://www.ondemandkorea.com/en/player/vod/capture-the-moment-how-is-that-possible?contentId=1605006',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)
if not webpage: data = self._download_json(
# Page sometimes returns captcha page with HTTP 403 f'https://odkmedia.io/odx/api/v3/playback/{video_id}/', video_id, fatal=False,
raise ExtractorError( headers={'service-name': 'odk'}, query={'did': str(uuid.uuid4())}, expected_status=(403, 404))
'Unable to access page. You may have been blocked.', if not traverse_obj(data, ('result', {dict})):
expected=True) msg = traverse_obj(data, ('messages', '__default'), 'title', expected_type=str)
raise ExtractorError(msg or 'Got empty response from playback API', expected=True)
if 'msg_block_01.png' in webpage: data = data['result']
self.raise_geo_restricted(
msg='This content is not available in your region',
countries=self._GEO_COUNTRIES)
if 'This video is only available to ODK PLUS members.' in webpage: def try_geo_bypass(url):
raise ExtractorError( return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
'This video is only available to ODK PLUS members.',
expected=True)
if 'ODK PREMIUM Members Only' in webpage: def try_upgrade_quality(url):
raise ExtractorError( mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
'This video is only available to ODK PREMIUM members.', return mod_url if mod_url != url and self._request_webpage(
expected=True) HEADRequest(mod_url), video_id, note='Checking for higher quality format',
errnote='No higher quality format found', fatal=False) else url
title = self._search_regex( formats = []
r'class=["\']episode_title["\'][^>]*>([^<]+)', for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
webpage, 'episode_title', fatal=False) or self._og_search_title(webpage) formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False))
jw_config = self._parse_json( subtitles = {}
self._search_regex(( for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
r'(?P<options>{\s*[\'"]tracks[\'"].*?})[)\];]+$', subtitles.setdefault(track.get('language', 'und'), []).append({
r'playlist\s*=\s*\[(?P<options>.+)];?$', 'url': track['url'],
r'odkPlayer\.init.*?(?P<options>{[^;]+}).*?;', 'ext': track.get('codec'),
), webpage, 'jw config', flags=re.MULTILINE | re.DOTALL, group='options'), 'name': track.get('label'),
video_id, transform_source=js_to_json) })
info = self._parse_jwplayer_data(
jw_config, video_id, require_title=False, m3u8_id='hls',
base_url=url)
info.update({ def if_series(key=None):
'title': title, return lambda obj: obj[key] if key and obj['kind'] == 'series' else None
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage) return {
}) 'id': video_id,
return info 'title': join_nonempty(
('episode', 'program', 'title'),
('episode', 'title'), from_dict=data, delim=': '),
**traverse_obj(data, {
'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}),
'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}),
'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}),
'series': ('episode', {if_series(key='program')}, 'title'),
'series_id': ('episode', {if_series(key='program')}, 'id'),
'episode': ('episode', {if_series(key='title')}),
'episode_number': ('episode', {if_series(key='number')}, {int_or_none}),
}, get_all=False),
'formats': formats,
'subtitles': subtitles,
}
class OnDemandKoreaProgramIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/(?P<id>[a-z0-9-]+)(?:$|#)'
_GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{
'url': 'https://www.ondemandkorea.com/player/vod/uskn-news',
'info_dict': {
'id': 'uskn-news',
},
'playlist_mincount': 755,
}, {
'url': 'https://www.ondemandkorea.com/en/player/vod/the-land',
'info_dict': {
'id': 'the-land',
},
'playlist_count': 52,
}]
_PAGE_SIZE = 100
def _fetch_page(self, display_id, page):
page += 1
page_data = self._download_json(
f'https://odkmedia.io/odx/api/v3/program/{display_id}/episodes/', display_id,
headers={'service-name': 'odk'}, query={
'page': page,
'page_size': self._PAGE_SIZE,
}, note=f'Downloading page {page}', expected_status=404)
for episode in traverse_obj(page_data, ('result', 'results', ...)):
yield self.url_result(
f'https://www.ondemandkorea.com/player/vod/{display_id}?contentId={episode["id"]}',
ie=OnDemandKoreaIE, video_title=episode.get('title'))
def _real_extract(self, url):
display_id = self._match_id(url)
entries = OnDemandPagedList(functools.partial(
self._fetch_page, display_id), self._PAGE_SIZE)
return self.playlist_result(entries, display_id)