mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 02:55:17 +00:00
[ie/nebula] Overhaul extractors (#8566)
Closes #4300, Closes #5814, Closes #7588, Closes #6334, Closes #6538 Authored by: elyse0, pukkandan, seproDev Co-authored-by: Elyse <26639800+elyse0@users.noreply.github.com> Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
This commit is contained in:
parent
3237f8ba29
commit
45d82be65f
3 changed files with 336 additions and 232 deletions
|
@ -214,8 +214,9 @@ def sanitize(key, value):
|
||||||
|
|
||||||
test_info_dict = {
|
test_info_dict = {
|
||||||
key: sanitize(key, value) for key, value in got_dict.items()
|
key: sanitize(key, value) for key, value in got_dict.items()
|
||||||
if value is not None and key not in IGNORED_FIELDS and not any(
|
if value is not None and key not in IGNORED_FIELDS and (
|
||||||
key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES)
|
not any(key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES)
|
||||||
|
or key == '_old_archive_ids')
|
||||||
}
|
}
|
||||||
|
|
||||||
# display_id may be generated from id
|
# display_id may be generated from id
|
||||||
|
|
|
@ -1247,6 +1247,7 @@
|
||||||
from .ndtv import NDTVIE
|
from .ndtv import NDTVIE
|
||||||
from .nebula import (
|
from .nebula import (
|
||||||
NebulaIE,
|
NebulaIE,
|
||||||
|
NebulaClassIE,
|
||||||
NebulaSubscriptionsIE,
|
NebulaSubscriptionsIE,
|
||||||
NebulaChannelIE,
|
NebulaChannelIE,
|
||||||
)
|
)
|
||||||
|
|
|
@ -3,230 +3,306 @@
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
make_archive_id,
|
||||||
|
parse_iso8601,
|
||||||
|
smuggle_url,
|
||||||
|
try_call,
|
||||||
|
unsmuggle_url,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
|
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
|
||||||
|
|
||||||
|
|
||||||
class NebulaBaseIE(InfoExtractor):
|
class NebulaBaseIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'watchnebula'
|
_NETRC_MACHINE = 'watchnebula'
|
||||||
|
_token = _api_token = None
|
||||||
|
|
||||||
_nebula_api_token = None
|
def _perform_login(self, username, password):
|
||||||
_nebula_bearer_token = None
|
|
||||||
|
|
||||||
def _perform_nebula_auth(self, username, password):
|
|
||||||
if not username or not password:
|
|
||||||
self.raise_login_required(method='password')
|
|
||||||
|
|
||||||
data = json.dumps({'email': username, 'password': password}).encode('utf8')
|
|
||||||
response = self._download_json(
|
|
||||||
'https://api.watchnebula.com/api/v1/auth/login/',
|
|
||||||
data=data, fatal=False, video_id=None,
|
|
||||||
headers={
|
|
||||||
'content-type': 'application/json',
|
|
||||||
# Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
|
|
||||||
'cookie': ''
|
|
||||||
},
|
|
||||||
note='Logging in to Nebula with supplied credentials',
|
|
||||||
errnote='Authentication failed or rejected')
|
|
||||||
if not response or not response.get('key'):
|
|
||||||
self.raise_login_required(method='password')
|
|
||||||
|
|
||||||
return response['key']
|
|
||||||
|
|
||||||
def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
|
|
||||||
assert method in ('GET', 'POST',)
|
|
||||||
assert auth_type in ('api', 'bearer',)
|
|
||||||
|
|
||||||
def inner_call():
|
|
||||||
authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
|
|
||||||
return self._download_json(
|
|
||||||
url, video_id, note=note, headers={'Authorization': authorization},
|
|
||||||
data=b'' if method == 'POST' else None)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return inner_call()
|
response = self._download_json(
|
||||||
except ExtractorError as exc:
|
'https://nebula.tv/auth/login/', None,
|
||||||
# if 401 or 403, attempt credential re-auth and retry
|
'Logging in to Nebula', 'Login failed',
|
||||||
if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.status in (401, 403):
|
data=json.dumps({'email': username, 'password': password}).encode(),
|
||||||
self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
|
headers={'content-type': 'application/json'})
|
||||||
self._perform_login()
|
except ExtractorError as e:
|
||||||
return inner_call()
|
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||||
else:
|
raise ExtractorError('Login failed: Invalid username or password', expected=True)
|
||||||
|
raise
|
||||||
|
self._api_token = traverse_obj(response, ('key', {str}))
|
||||||
|
if not self._api_token:
|
||||||
|
raise ExtractorError('Login failed: No token')
|
||||||
|
|
||||||
|
def _call_api(self, *args, **kwargs):
|
||||||
|
if self._token:
|
||||||
|
kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
|
||||||
|
try:
|
||||||
|
return self._download_json(*args, **kwargs)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
|
||||||
|
raise
|
||||||
|
self.to_screen(
|
||||||
|
f'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
|
||||||
|
self._real_initialize()
|
||||||
|
if self._token:
|
||||||
|
kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
|
||||||
|
return self._download_json(*args, **kwargs)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if not self._api_token:
|
||||||
|
self._api_token = try_call(
|
||||||
|
lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
|
||||||
|
self._token = self._download_json(
|
||||||
|
'https://users.api.nebula.app/api/v1/authorization/', None,
|
||||||
|
headers={'Authorization': f'Token {self._api_token}'} if self._api_token else None,
|
||||||
|
note='Authorizing to Nebula', data=b'')['token']
|
||||||
|
|
||||||
|
def _extract_formats(self, content_id, slug):
|
||||||
|
for retry in (False, True):
|
||||||
|
try:
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
|
||||||
|
slug, 'mp4', query={
|
||||||
|
'token': self._token,
|
||||||
|
'app_version': '23.10.0',
|
||||||
|
'platform': 'ios',
|
||||||
|
})
|
||||||
|
return {'formats': fmts, 'subtitles': subs}
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||||
|
self.raise_login_required()
|
||||||
|
if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||||
|
self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
|
||||||
|
self._real_initialize()
|
||||||
|
continue
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _fetch_nebula_bearer_token(self):
|
def _extract_video_metadata(self, episode):
|
||||||
"""
|
channel_url = traverse_obj(
|
||||||
Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
|
episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
|
||||||
"""
|
|
||||||
response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
|
|
||||||
method='POST',
|
|
||||||
note='Authorizing to Nebula')
|
|
||||||
return response['token']
|
|
||||||
|
|
||||||
def _fetch_video_formats(self, slug):
|
|
||||||
stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/',
|
|
||||||
video_id=slug,
|
|
||||||
auth_type='bearer',
|
|
||||||
note='Fetching video stream info')
|
|
||||||
manifest_url = stream_info['manifest']
|
|
||||||
return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4')
|
|
||||||
|
|
||||||
def _build_video_info(self, episode):
|
|
||||||
fmts, subs = self._fetch_video_formats(episode['slug'])
|
|
||||||
channel_slug = episode['channel_slug']
|
|
||||||
channel_title = episode['channel_title']
|
|
||||||
zype_id = episode.get('zype_id')
|
|
||||||
return {
|
return {
|
||||||
'id': remove_start(episode['id'], 'video_episode:'),
|
'id': episode['id'].partition(':')[2],
|
||||||
'display_id': episode['slug'],
|
**traverse_obj(episode, {
|
||||||
'formats': fmts,
|
'display_id': 'slug',
|
||||||
'subtitles': subs,
|
'title': 'title',
|
||||||
'webpage_url': f'https://nebula.tv/{episode["slug"]}',
|
'description': 'description',
|
||||||
'title': episode['title'],
|
'timestamp': ('published_at', {parse_iso8601}),
|
||||||
'description': episode['description'],
|
'duration': ('duration', {int_or_none}),
|
||||||
'timestamp': parse_iso8601(episode['published_at']),
|
'channel_id': 'channel_slug',
|
||||||
'thumbnails': [{
|
'uploader_id': 'channel_slug',
|
||||||
# 'id': tn.get('name'), # this appears to be null
|
'channel': 'channel_title',
|
||||||
'url': tn['original'],
|
'uploader': 'channel_title',
|
||||||
'height': key,
|
'series': 'channel_title',
|
||||||
} for key, tn in episode['assets']['thumbnail'].items()],
|
'creator': 'channel_title',
|
||||||
'duration': episode['duration'],
|
'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}),
|
||||||
'channel': channel_title,
|
'episode_number': ('order', {int_or_none}),
|
||||||
'channel_id': channel_slug,
|
# Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
|
||||||
'channel_url': f'https://nebula.tv/{channel_slug}',
|
'_old_archive_ids': ('zype_id', {lambda x: [
|
||||||
'uploader': channel_title,
|
make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
|
||||||
'uploader_id': channel_slug,
|
}),
|
||||||
'uploader_url': f'https://nebula.tv/{channel_slug}',
|
'channel_url': channel_url,
|
||||||
'series': channel_title,
|
'uploader_url': channel_url,
|
||||||
'creator': channel_title,
|
|
||||||
'extractor_key': NebulaIE.ie_key(),
|
|
||||||
'extractor': NebulaIE.IE_NAME,
|
|
||||||
'_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _perform_login(self, username=None, password=None):
|
|
||||||
self._nebula_api_token = self._perform_nebula_auth(username, password)
|
|
||||||
self._nebula_bearer_token = self._fetch_nebula_bearer_token()
|
|
||||||
|
|
||||||
|
|
||||||
class NebulaIE(NebulaBaseIE):
|
class NebulaIE(NebulaBaseIE):
|
||||||
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
|
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
|
||||||
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
|
'info_dict': {
|
||||||
'md5': '14944cfee8c7beeea106320c47560efc',
|
'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
|
||||||
'info_dict': {
|
'ext': 'mp4',
|
||||||
'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
|
'title': 'That Time Disney Remade Beauty and the Beast',
|
||||||
'ext': 'mp4',
|
'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
|
||||||
'title': 'That Time Disney Remade Beauty and the Beast',
|
'upload_date': '20180731',
|
||||||
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
|
'timestamp': 1533009600,
|
||||||
'upload_date': '20180731',
|
'channel': 'Lindsay Ellis',
|
||||||
'timestamp': 1533009600,
|
'channel_id': 'lindsayellis',
|
||||||
'channel': 'Lindsay Ellis',
|
'uploader': 'Lindsay Ellis',
|
||||||
'channel_id': 'lindsayellis',
|
'uploader_id': 'lindsayellis',
|
||||||
'uploader': 'Lindsay Ellis',
|
'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
|
||||||
'uploader_id': 'lindsayellis',
|
'series': 'Lindsay Ellis',
|
||||||
'uploader_url': 'https://nebula.tv/lindsayellis',
|
'display_id': 'that-time-disney-remade-beauty-and-the-beast',
|
||||||
'series': 'Lindsay Ellis',
|
'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
|
||||||
'display_id': 'that-time-disney-remade-beauty-and-the-beast',
|
'creator': 'Lindsay Ellis',
|
||||||
'channel_url': 'https://nebula.tv/lindsayellis',
|
'duration': 2212,
|
||||||
'creator': 'Lindsay Ellis',
|
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
|
||||||
'duration': 2212,
|
'_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
|
||||||
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
'params': {'skip_download': 'm3u8'},
|
||||||
'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
}, {
|
||||||
'md5': 'd05739cf6c38c09322422f696b569c23',
|
'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||||
'info_dict': {
|
'md5': 'd05739cf6c38c09322422f696b569c23',
|
||||||
'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
|
||||||
'title': 'Landing Craft - How The Allies Got Ashore',
|
'ext': 'mp4',
|
||||||
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
|
'title': 'Landing Craft - How The Allies Got Ashore',
|
||||||
'upload_date': '20200327',
|
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
|
||||||
'timestamp': 1585348140,
|
'upload_date': '20200327',
|
||||||
'channel': 'Real Engineering — The Logistics of D-Day',
|
'timestamp': 1585348140,
|
||||||
'channel_id': 'd-day',
|
'channel': 'Real Engineering — The Logistics of D-Day',
|
||||||
'uploader': 'Real Engineering — The Logistics of D-Day',
|
'channel_id': 'd-day',
|
||||||
'uploader_id': 'd-day',
|
'uploader': 'Real Engineering — The Logistics of D-Day',
|
||||||
'series': 'Real Engineering — The Logistics of D-Day',
|
'uploader_id': 'd-day',
|
||||||
'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
'series': 'Real Engineering — The Logistics of D-Day',
|
||||||
'creator': 'Real Engineering — The Logistics of D-Day',
|
'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
|
||||||
'duration': 841,
|
'creator': 'Real Engineering — The Logistics of D-Day',
|
||||||
'channel_url': 'https://nebula.tv/d-day',
|
'duration': 841,
|
||||||
'uploader_url': 'https://nebula.tv/d-day',
|
'channel_url': 'https://nebula.tv/d-day',
|
||||||
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
|
'uploader_url': 'https://nebula.tv/d-day',
|
||||||
},
|
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
|
||||||
|
'_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
|
||||||
},
|
},
|
||||||
{
|
'params': {'skip_download': 'm3u8'},
|
||||||
'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
|
}, {
|
||||||
'md5': 'ebe28a7ad822b9ee172387d860487868',
|
'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
|
||||||
'info_dict': {
|
'md5': 'ebe28a7ad822b9ee172387d860487868',
|
||||||
'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
|
||||||
'title': 'Episode 1: The Draw',
|
'ext': 'mp4',
|
||||||
'description': r'contains:There’s free money on offer… if the players can all work together.',
|
'title': 'Episode 1: The Draw',
|
||||||
'upload_date': '20200323',
|
'description': r'contains:There’s free money on offer… if the players can all work together.',
|
||||||
'timestamp': 1584980400,
|
'upload_date': '20200323',
|
||||||
'channel': 'Tom Scott Presents: Money',
|
'timestamp': 1584980400,
|
||||||
'channel_id': 'tom-scott-presents-money',
|
'channel': 'Tom Scott Presents: Money',
|
||||||
'uploader': 'Tom Scott Presents: Money',
|
'channel_id': 'tom-scott-presents-money',
|
||||||
'uploader_id': 'tom-scott-presents-money',
|
'uploader': 'Tom Scott Presents: Money',
|
||||||
'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
|
'uploader_id': 'tom-scott-presents-money',
|
||||||
'duration': 825,
|
'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
|
||||||
'channel_url': 'https://nebula.tv/tom-scott-presents-money',
|
'duration': 825,
|
||||||
'series': 'Tom Scott Presents: Money',
|
'channel_url': 'https://nebula.tv/tom-scott-presents-money',
|
||||||
'display_id': 'money-episode-1-the-draw',
|
'series': 'Tom Scott Presents: Money',
|
||||||
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
|
'display_id': 'money-episode-1-the-draw',
|
||||||
'creator': 'Tom Scott Presents: Money',
|
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
|
||||||
},
|
'creator': 'Tom Scott Presents: Money',
|
||||||
|
'_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
|
||||||
},
|
},
|
||||||
{
|
'params': {'skip_download': 'm3u8'},
|
||||||
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
}, {
|
||||||
'only_matching': True,
|
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
|
||||||
|
'title': 'Did the US Really Blow Up the NordStream Pipelines?',
|
||||||
|
'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
|
||||||
|
'upload_date': '20230223',
|
||||||
|
'timestamp': 1677144070,
|
||||||
|
'channel': 'TLDR News EU',
|
||||||
|
'channel_id': 'tldrnewseu',
|
||||||
|
'uploader': 'TLDR News EU',
|
||||||
|
'uploader_id': 'tldrnewseu',
|
||||||
|
'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
|
||||||
|
'duration': 524,
|
||||||
|
'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
|
||||||
|
'series': 'TLDR News EU',
|
||||||
|
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
|
||||||
|
'creator': 'TLDR News EU',
|
||||||
|
'_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
|
||||||
},
|
},
|
||||||
{
|
'params': {'skip_download': 'm3u8'},
|
||||||
'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
|
}, {
|
||||||
'only_matching': True,
|
'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
|
||||||
},
|
'only_matching': True,
|
||||||
]
|
}]
|
||||||
|
|
||||||
def _fetch_video_metadata(self, slug):
|
|
||||||
return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/',
|
|
||||||
video_id=slug,
|
|
||||||
auth_type='bearer',
|
|
||||||
note='Fetching video meta data')
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
slug = self._match_id(url)
|
slug = self._match_id(url)
|
||||||
video = self._fetch_video_metadata(slug)
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
return self._build_video_info(video)
|
if smuggled_data.get('id'):
|
||||||
|
return {
|
||||||
|
'id': smuggled_data['id'],
|
||||||
|
'display_id': slug,
|
||||||
|
'title': '',
|
||||||
|
**self._extract_formats(smuggled_data['id'], slug),
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata = self._call_api(
|
||||||
|
f'https://content.api.nebula.app/content/videos/{slug}',
|
||||||
|
slug, note='Fetching video metadata')
|
||||||
|
return {
|
||||||
|
**self._extract_video_metadata(metadata),
|
||||||
|
**self._extract_formats(metadata['id'], slug),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NebulaClassIE(NebulaBaseIE):
|
||||||
|
IE_NAME = 'nebula:class'
|
||||||
|
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': '14',
|
||||||
|
'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
|
||||||
|
'episode_number': 14,
|
||||||
|
'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
|
||||||
|
'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
|
||||||
|
'duration': 646,
|
||||||
|
'episode': 'Episode 14',
|
||||||
|
'title': 'Photos, Sculpture, and Video',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
slug, episode = self._match_valid_url(url).group('id', 'ep')
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
if smuggled_data.get('id'):
|
||||||
|
return {
|
||||||
|
'id': smuggled_data['id'],
|
||||||
|
'display_id': slug,
|
||||||
|
'title': '',
|
||||||
|
**self._extract_formats(smuggled_data['id'], slug),
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata = self._call_api(
|
||||||
|
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
|
||||||
|
slug, note='Fetching video metadata')
|
||||||
|
return {
|
||||||
|
**self._extract_video_metadata(metadata),
|
||||||
|
**self._extract_formats(metadata['id'], slug),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class NebulaSubscriptionsIE(NebulaBaseIE):
|
class NebulaSubscriptionsIE(NebulaBaseIE):
|
||||||
IE_NAME = 'nebula:subscriptions'
|
IE_NAME = 'nebula:subscriptions'
|
||||||
_VALID_URL = rf'{_BASE_URL_RE}/myshows'
|
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
'url': 'https://nebula.tv/myshows',
|
||||||
'url': 'https://nebula.tv/myshows',
|
'playlist_mincount': 1,
|
||||||
'playlist_mincount': 1,
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': 'myshows',
|
||||||
'id': 'myshows',
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
]
|
}]
|
||||||
|
|
||||||
def _generate_playlist_entries(self):
|
def _generate_playlist_entries(self):
|
||||||
next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
|
next_url = update_url_query('https://content.api.nebula.app/video_episodes/', {
|
||||||
page_num = 1
|
'following': 'true',
|
||||||
while next_url:
|
'include': 'engagement',
|
||||||
channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
|
'ordering': '-published_at',
|
||||||
note=f'Retrieving subscriptions page {page_num}')
|
})
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
channel = self._call_api(
|
||||||
|
next_url, 'myshows', note=f'Retrieving subscriptions page {page_num}')
|
||||||
for episode in channel['results']:
|
for episode in channel['results']:
|
||||||
yield self._build_video_info(episode)
|
metadata = self._extract_video_metadata(episode)
|
||||||
next_url = channel['next']
|
yield self.url_result(smuggle_url(
|
||||||
page_num += 1
|
f'https://nebula.tv/videos/{metadata["display_id"]}',
|
||||||
|
{'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
|
||||||
|
next_url = channel.get('next')
|
||||||
|
if not next_url:
|
||||||
|
return
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self.playlist_result(self._generate_playlist_entries(), 'myshows')
|
return self.playlist_result(self._generate_playlist_entries(), 'myshows')
|
||||||
|
@ -234,48 +310,74 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class NebulaChannelIE(NebulaBaseIE):
|
class NebulaChannelIE(NebulaBaseIE):
|
||||||
IE_NAME = 'nebula:channel'
|
IE_NAME = 'nebula:channel'
|
||||||
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
|
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])'
|
||||||
_TESTS = [
|
_TESTS = [{
|
||||||
{
|
'url': 'https://nebula.tv/tom-scott-presents-money',
|
||||||
'url': 'https://nebula.tv/tom-scott-presents-money',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': 'tom-scott-presents-money',
|
||||||
'id': 'tom-scott-presents-money',
|
'title': 'Tom Scott Presents: Money',
|
||||||
'title': 'Tom Scott Presents: Money',
|
'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
|
||||||
'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
|
|
||||||
},
|
|
||||||
'playlist_count': 5,
|
|
||||||
}, {
|
|
||||||
'url': 'https://nebula.tv/lindsayellis',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'lindsayellis',
|
|
||||||
'title': 'Lindsay Ellis',
|
|
||||||
'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 2,
|
|
||||||
},
|
},
|
||||||
]
|
'playlist_count': 5,
|
||||||
|
}, {
|
||||||
|
'url': 'https://nebula.tv/lindsayellis',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lindsayellis',
|
||||||
|
'title': 'Lindsay Ellis',
|
||||||
|
'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'https://nebula.tv/johnnyharris',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'johnnyharris',
|
||||||
|
'title': 'Johnny Harris',
|
||||||
|
'description': 'I make videos about maps and many other things.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 90,
|
||||||
|
}, {
|
||||||
|
'url': 'https://nebula.tv/copyright-for-fun-and-profit',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'copyright-for-fun-and-profit',
|
||||||
|
'title': 'Copyright for Fun and Profit',
|
||||||
|
'description': 'md5:6690248223eed044a9f11cd5a24f9742',
|
||||||
|
},
|
||||||
|
'playlist_count': 23,
|
||||||
|
}]
|
||||||
|
|
||||||
def _generate_playlist_entries(self, collection_id, channel):
|
def _generate_playlist_entries(self, collection_id, collection_slug):
|
||||||
episodes = channel['episodes']['results']
|
next_url = f'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
|
||||||
for page_num in itertools.count(2):
|
for page_num in itertools.count(1):
|
||||||
for episode in episodes:
|
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving channel page {page_num}')
|
||||||
yield self._build_video_info(episode)
|
for episode in episodes['results']:
|
||||||
next_url = channel['episodes']['next']
|
metadata = self._extract_video_metadata(episode)
|
||||||
|
yield self.url_result(smuggle_url(
|
||||||
|
episode.get('share_url') or f'https://nebula.tv/videos/{metadata["display_id"]}',
|
||||||
|
{'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
|
||||||
|
next_url = episodes.get('next')
|
||||||
if not next_url:
|
if not next_url:
|
||||||
break
|
break
|
||||||
channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
|
|
||||||
note=f'Retrieving channel page {page_num}')
|
def _generate_class_entries(self, channel):
|
||||||
episodes = channel['episodes']['results']
|
for lesson in channel['lessons']:
|
||||||
|
metadata = self._extract_video_metadata(lesson)
|
||||||
|
yield self.url_result(smuggle_url(
|
||||||
|
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
|
||||||
|
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
collection_id = self._match_id(url)
|
collection_slug = self._match_id(url)
|
||||||
channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
|
channel = self._call_api(
|
||||||
channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
|
f'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
|
||||||
channel_details = channel['details']
|
collection_slug, note='Retrieving channel')
|
||||||
|
|
||||||
|
if channel.get('type') == 'class':
|
||||||
|
entries = self._generate_class_entries(channel)
|
||||||
|
else:
|
||||||
|
entries = self._generate_playlist_entries(channel['id'], collection_slug)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries=self._generate_playlist_entries(collection_id, channel),
|
entries=entries,
|
||||||
playlist_id=collection_id,
|
playlist_id=collection_slug,
|
||||||
playlist_title=channel_details['title'],
|
playlist_title=channel.get('title'),
|
||||||
playlist_description=channel_details['description']
|
playlist_description=channel.get('description'))
|
||||||
)
|
|
||||||
|
|
Loading…
Reference in a new issue