mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-22 06:00:00 +00:00
[ie/podbayfm] Fix extraction (#10195)
Authored by: bashonly, seproDev Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
This commit is contained in:
parent
d6c2c2bc84
commit
d4b52ce3fc
1 changed files with 27 additions and 14 deletions
|
@ -1,28 +1,40 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
|
from ..utils import (
|
||||||
|
OnDemandPagedList,
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
jwt_decode_hs256,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
def result_from_props(props, episode_id=None):
|
def result_from_props(props):
|
||||||
return {
|
return {
|
||||||
'id': props.get('podcast_id') or episode_id,
|
**traverse_obj(props, {
|
||||||
'title': props.get('title'),
|
'id': ('_id', {str}),
|
||||||
'url': props['mediaURL'],
|
'title': ('title', {str}),
|
||||||
|
'url': ('mediaURL', {url_or_none}),
|
||||||
|
'description': ('description', {clean_html}),
|
||||||
|
'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}),
|
||||||
|
'timestamp': ('timestamp', {int_or_none}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
}),
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
|
'vcodec': 'none',
|
||||||
'timestamp': props.get('timestamp'),
|
|
||||||
'duration': int_or_none(props.get('duration')),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class PodbayFMIE(InfoExtractor):
|
class PodbayFMIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
|
_VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
|
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
|
||||||
'md5': '98b41285dcf7989d105a4ed0404054cf',
|
'md5': '895ac8505de349515f5ee8a4a3195c93',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1647338400',
|
'id': '62306451f4a48e58d0c4d6a8',
|
||||||
'title': 'Part One: Kissinger',
|
'title': 'Part One: Kissinger',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
|
'description': r're:^We begin our epic six part series on Henry Kissinger.+',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'timestamp': 1647338400,
|
'timestamp': 1647338400,
|
||||||
'duration': 5001,
|
'duration': 5001,
|
||||||
|
@ -34,24 +46,25 @@ def _real_extract(self, url):
|
||||||
episode_id = self._match_id(url)
|
episode_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, episode_id)
|
webpage = self._download_webpage(url, episode_id)
|
||||||
data = self._search_nextjs_data(webpage, episode_id)
|
data = self._search_nextjs_data(webpage, episode_id)
|
||||||
return result_from_props(data['props']['pageProps']['episode'], episode_id)
|
return result_from_props(data['props']['pageProps']['episode'])
|
||||||
|
|
||||||
|
|
||||||
class PodbayFMChannelIE(InfoExtractor):
|
class PodbayFMChannelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
|
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://podbay.fm/p/behind-the-bastards',
|
'url': 'https://podbay.fm/p/behind-the-bastards',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'behind-the-bastards',
|
'id': 'behind-the-bastards',
|
||||||
'title': 'Behind the Bastards',
|
'title': 'Behind the Bastards',
|
||||||
},
|
},
|
||||||
|
'playlist_mincount': 21,
|
||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 10
|
_PAGE_SIZE = 10
|
||||||
|
|
||||||
def _fetch_page(self, channel_id, pagenum):
|
def _fetch_page(self, channel_id, pagenum):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
|
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
|
||||||
channel_id)['podcast']
|
f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast']
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _results_from_page(channel_id, page):
|
def _results_from_page(channel_id, page):
|
||||||
|
|
Loading…
Reference in a new issue