yt-dlp/yt_dlp/extractor/bbc.py

import functools
import itertools
import json
import re
import urllib.parse
import xml.etree.ElementTree

from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
    ExtractorError,
    OnDemandPagedList,
    clean_html,
    dict_get,
    float_or_none,
    get_element_by_class,
    int_or_none,
    join_nonempty,
    js_to_json,
    parse_duration,
    parse_iso8601,
    parse_qs,
    strip_or_none,
    traverse_obj,
    try_get,
    unescapeHTML,
    unified_timestamp,
    url_or_none,
    urlencode_postdata,
    urljoin,
)


class BBCCoUkIE(InfoExtractor):
    IE_NAME = 'bbc.co.uk'
    IE_DESC = 'BBC iPlayer'
    _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
    _VALID_URL = rf'''(?x)
                    https?://
                        (?:www\.)?bbc\.co\.uk/
                        (?:
                            programmes/(?!articles/)|
                            iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
                            music/(?:clips|audiovideo/popular)[/#]|
                            radio/player/|
                            events/[^/]+/play/[^/]+/
                        )
                        (?P<id>{_ID_REGEX})(?!/(?:episodes|broadcasts|clips))
                    '''
    _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']

    _LOGIN_URL = 'https://account.bbc.com/signin'
    _NETRC_MACHINE = 'bbc'

    _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
    _MEDIA_SETS = [
        # Provides HQ HLS streams with even better quality that pc mediaset but fails
        # with geolocation in some cases when it's even not geo restricted at all (e.g.
        # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
        'iptv-all',
        'pc',
    ]

    _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'

    _TESTS = [
        {
            'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
            'info_dict': {
                'id': 'b039d07m',
                'ext': 'flv',
                'title': 'Kaleidoscope, Leonard Cohen',
                'description': 'The Canadian poet and songwriter reflects on his musical career.',
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        },
        {
            'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
            'info_dict': {
                'id': 'b00yng1d',
                'ext': 'flv',
                'title': 'The Man in Black: Series 3: The Printed Name',
                'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
                'duration': 1800,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'Episode is no longer available on BBC iPlayer Radio',
        },
        {
            'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
            'info_dict': {
                'id': 'b00yng1d',
                'ext': 'flv',
                'title': 'The Voice UK: Series 3: Blind Auditions 5',
                'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
                'duration': 5100,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
        },
        {
            'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
            'info_dict': {
                'id': 'b03k3pb7',
                'ext': 'flv',
                'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
                'description': '2. Invasion',
                'duration': 3600,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
        }, {
            'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
            'info_dict': {
                'id': 'b04v209v',
                'ext': 'flv',
                'title': 'Pete Tong, The Essential New Tune Special',
                'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
                'duration': 10800,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'Episode is no longer available on BBC iPlayer Radio',
        }, {
            'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
            'note': 'Audio',
            'info_dict': {
                'id': 'p022h44j',
                'ext': 'flv',
                'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
                'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
                'duration': 227,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        }, {
            'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
            'note': 'Video',
            'info_dict': {
                'id': 'p025c103',
                'ext': 'flv',
                'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
                'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
                'duration': 226,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        }, {
            'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
            'info_dict': {
                'id': 'p02n76xf',
                'ext': 'flv',
                'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
                'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
                'duration': 3540,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'geolocation',
        }, {
            'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
            'info_dict': {
                'id': 'b05zmgw1',
                'ext': 'flv',
                'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
                'title': 'Royal Academy Summer Exhibition',
                'duration': 3540,
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'geolocation',
        }, {
            # iptv-all mediaset fails with geolocation however there is no geo restriction
            # for this programme at all
            'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
            'info_dict': {
                'id': 'b06rkms3',
                'ext': 'flv',
                'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
                'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
            'skip': 'Now it\'s really geo-restricted',
        }, {
            # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
            'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
            'info_dict': {
                'id': 'p028bfkj',
                'ext': 'flv',
                'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
                'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
            },
            'params': {
                # rtmp download
                'skip_download': True,
            },
        }, {
            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
            'only_matching': True,
        }, {
            'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
            'only_matching': True,
        }, {
            'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
            'only_matching': True,
        }, {
            'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
            'only_matching': True,
        }, {
            'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
            'only_matching': True,
        }, {
            'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
            'only_matching': True,
        }, {
            'url': 'https://www.bbc.co.uk/programmes/m00005xn',
            'only_matching': True,
        }, {
            'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
            'only_matching': True,
        }]

    def _perform_login(self, username, password):
        login_page = self._download_webpage(
            self._LOGIN_URL, None, 'Downloading signin page')

        login_form = self._hidden_inputs(login_page)

        login_form.update({
            'username': username,
            'password': password,
        })

        post_url = urljoin(self._LOGIN_URL, self._search_regex(
            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
            'post url', default=self._LOGIN_URL, group='url'))

        response, urlh = self._download_webpage_handle(
            post_url, None, 'Logging in', data=urlencode_postdata(login_form),
            headers={'Referer': self._LOGIN_URL})

        if self._LOGIN_URL in urlh.url:
            error = clean_html(get_element_by_class('form-message', response))
            if error:
                raise ExtractorError(
                    f'Unable to login: {error}', expected=True)
            raise ExtractorError('Unable to log in')

    class MediaSelectionError(Exception):
        def __init__(self, error_id):
            self.id = error_id

    def _extract_asx_playlist(self, connection, programme_id):
        asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
        return [ref.get('href') for ref in asx.findall('./Entry/ref')]

    def _extract_items(self, playlist):
        return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item')

    def _extract_medias(self, media_selection):
        error = media_selection.get('result')
        if error:
            raise BBCCoUkIE.MediaSelectionError(error)
        return media_selection.get('media') or []

    def _extract_connections(self, media):
        return media.get('connection') or []

    def _get_subtitles(self, media, programme_id):
        subtitles = {}
        for connection in self._extract_connections(media):
            cc_url = url_or_none(connection.get('href'))
            if not cc_url:
                continue
            captions = self._download_xml(
                cc_url, programme_id, 'Downloading captions', fatal=False)
            if not isinstance(captions, xml.etree.ElementTree.Element):
                continue
            subtitles['en'] = [
                {
                    'url': connection.get('href'),
                    'ext': 'ttml',
                },
            ]
            break
        return subtitles

    def _raise_extractor_error(self, media_selection_error):
        raise ExtractorError(
            f'{self.IE_NAME} returned error: {media_selection_error.id}',
            expected=True)

    def _download_media_selector(self, programme_id):
        last_exception = None
        formats, subtitles = [], {}
        for media_set in self._MEDIA_SETS:
            try:
                fmts, subs = self._download_media_selector_url(
                    self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
                formats.extend(fmts)
                if subs:
                    self._merge_subtitles(subs, target=subtitles)
            except BBCCoUkIE.MediaSelectionError as e:
                if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
                    last_exception = e
                    continue
                self._raise_extractor_error(e)
        if last_exception:
            if formats or subtitles:
                self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
            else:
                self._raise_extractor_error(last_exception)
        return formats, subtitles

    def _download_media_selector_url(self, url, programme_id=None):
        media_selection = self._download_json(
            url, programme_id, 'Downloading media selection JSON',
            expected_status=(403, 404))
        return self._process_media_selector(media_selection, programme_id)

    def _process_media_selector(self, media_selection, programme_id):
        formats = []
        subtitles = None
        urls = []

        for media in self._extract_medias(media_selection):
            kind = media.get('kind')
            if kind in ('video', 'audio'):
                bitrate = int_or_none(media.get('bitrate'))
                encoding = media.get('encoding')
                width = int_or_none(media.get('width'))
                height = int_or_none(media.get('height'))
                file_size = int_or_none(media.get('media_file_size'))
                for connection in self._extract_connections(media):
                    href = connection.get('href')
                    if href in urls:
                        continue
                    if href:
                        urls.append(href)
                    conn_kind = connection.get('kind')
                    protocol = connection.get('protocol')
                    supplier = connection.get('supplier')
                    transfer_format = connection.get('transferFormat')
                    format_id = supplier or conn_kind or protocol
                    # ASX playlist
                    if supplier == 'asx':
                        for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
                            formats.append({
                                'url': ref,
                                'format_id': f'ref{i}_{format_id}',
                            })
                    elif transfer_format == 'dash':
                        formats.extend(self._extract_mpd_formats(
                            href, programme_id, mpd_id=format_id, fatal=False))
                    elif transfer_format == 'hls':
                        # TODO: let expected_status be passed into _extract_xxx_formats() instead
                        try:
                            fmts = self._extract_m3u8_formats(
                                href, programme_id, ext='mp4', entry_protocol='m3u8_native',
                                m3u8_id=format_id, fatal=False)
                        except ExtractorError as e:
                            if not (isinstance(e.exc_info[1], HTTPError)
                                    and e.exc_info[1].status in (403, 404)):
                                raise
                            fmts = []
                        formats.extend(fmts)
                    elif transfer_format == 'hds':
                        formats.extend(self._extract_f4m_formats(
                            href, programme_id, f4m_id=format_id, fatal=False))
                    else:
                        if not supplier and bitrate:
                            format_id += f'-{bitrate}'
                        fmt = {
                            'format_id': format_id,
                            'filesize': file_size,
                        }
                        if kind == 'video':
                            fmt.update({
                                'width': width,
                                'height': height,
                                'tbr': bitrate,
                                'vcodec': encoding,
                            })
                        else:
                            fmt.update({
                                'abr': bitrate,
                                'acodec': encoding,
                                'vcodec': 'none',
                            })
                        if protocol in ('http', 'https'):
                            # Direct link
                            fmt.update({
                                'url': href,
                            })
                        elif protocol == 'rtmp':
                            application = connection.get('application', 'ondemand')
                            auth_string = connection.get('authString')
                            identifier = connection.get('identifier')
                            server = connection.get('server')
                            fmt.update({
                                'url': f'{protocol}://{server}/{application}?{auth_string}',
                                'play_path': identifier,
                                'app': f'{application}?{auth_string}',
                                'page_url': 'http://www.bbc.co.uk',
                                'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
                                'rtmp_live': False,
                                'ext': 'flv',
                            })
                        else:
                            continue
                        formats.append(fmt)
            elif kind == 'captions':
                subtitles = self.extract_subtitles(media, programme_id)
        return formats, subtitles

    def _download_playlist(self, playlist_id):
        try:
            playlist = self._download_json(
                f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json',
                playlist_id, 'Downloading playlist JSON')
            formats = []
            subtitles = {}

            for version in playlist.get('allAvailableVersions', []):
                smp_config = version['smpConfig']
                title = smp_config['title']
                description = smp_config['summary']
                for item in smp_config['items']:
                    kind = item['kind']
                    if kind not in ('programme', 'radioProgramme'):
                        continue
                    programme_id = item.get('vpid')
                    duration = int_or_none(item.get('duration'))
                    version_formats, version_subtitles = self._download_media_selector(programme_id)
                    types = version['types']
                    for f in version_formats:
                        f['format_note'] = ', '.join(types)
                        if any('AudioDescribed' in x for x in types):
                            f['language_preference'] = -10
                    formats += version_formats
                    for tag, subformats in (version_subtitles or {}).items():
                        subtitles.setdefault(tag, []).extend(subformats)

            return programme_id, title, description, duration, formats, subtitles
        except ExtractorError as ee:
            if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
                raise

        # fallback to legacy playlist
        return self._process_legacy_playlist(playlist_id)

    def _process_legacy_playlist_url(self, url, display_id):
        playlist = self._download_legacy_playlist_url(url, display_id)
        return self._extract_from_legacy_playlist(playlist, display_id)

    def _process_legacy_playlist(self, playlist_id):
        return self._process_legacy_playlist_url(
            f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id)

    def _download_legacy_playlist_url(self, url, playlist_id=None):
        return self._download_xml(
            url, playlist_id, 'Downloading legacy playlist XML')

    def _extract_from_legacy_playlist(self, playlist, playlist_id):
        no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems')
        if no_items is not None:
            reason = no_items.get('reason')
            if reason == 'preAvailability':
                msg = f'Episode {playlist_id} is not yet available'
            elif reason == 'postAvailability':
                msg = f'Episode {playlist_id} is no longer available'
            elif reason == 'noMedia':
                msg = f'Episode {playlist_id} is not currently available'
            else:
                msg = f'Episode {playlist_id} is not available: {reason}'
            raise ExtractorError(msg, expected=True)

        for item in self._extract_items(playlist):
            kind = item.get('kind')
            if kind not in ('programme', 'radioProgramme'):
                continue
            title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text
            description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary')
            description = description_el.text if description_el is not None else None

            def get_programme_id(item):
                def get_from_attributes(item):
                    for p in ('identifier', 'group'):
                        value = item.get(p)
                        if value and re.match(r'^[pb][\da-z]{7}$', value):
                            return value
                get_from_attributes(item)
                mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator')
                if mediator is not None:
                    return get_from_attributes(mediator)

            programme_id = get_programme_id(item)
            duration = int_or_none(item.get('duration'))

            if programme_id:
                formats, subtitles = self._download_media_selector(programme_id)
            else:
                formats, subtitles = self._process_media_selector(item, playlist_id)
                programme_id = playlist_id

        return programme_id, title, description, duration, formats, subtitles

    def _real_extract(self, url):
        group_id = self._match_id(url)

        webpage = self._download_webpage(url, group_id, 'Downloading video page')

        error = self._search_regex(
            r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
            webpage, 'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)

        programme_id = None
        duration = None

        tviplayer = self._search_regex(
            r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
            webpage, 'player', default=None)

        if tviplayer:
            player = self._parse_json(tviplayer, group_id).get('player', {})
            duration = int_or_none(player.get('duration'))
            programme_id = player.get('vpid')

        if not programme_id:
            programme_id = self._search_regex(
                rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None)

        if programme_id:
            formats, subtitles = self._download_media_selector(programme_id)
            title = self._og_search_title(webpage, default=None) or self._html_search_regex(
                (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
                 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
            description = self._search_regex(
                (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
                 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
                webpage, 'description', default=None)
            if not description:
                description = self._html_search_meta('description', webpage)
        else:
            programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)

        return {
            'id': programme_id,
            'title': title,
            'description': description,
            'thumbnail': self._og_search_thumbnail(webpage, default=None),
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles,
        }


class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
    IE_NAME = 'bbc'
    IE_DESC = 'BBC'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?(?:
            bbc\.(?:com|co\.uk)|
            bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
            bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
        )/(?:[^/]+/)+(?P<id>[^/#?]+)'''

    _MEDIA_SETS = [
        'pc',
        'mobile-tablet-main',
    ]

    _TESTS = [{
        # article with multiple videos embedded with data-playable containing vpids
        'url': 'http://www.bbc.com/news/world-europe-32668511',
        'info_dict': {
            'id': 'world-europe-32668511',
            'title': 'Russia stages massive WW2 parade despite Western boycott',
            'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
        },
        'playlist_count': 2,
    }, {
        # article with multiple videos embedded with data-playable (more videos)
        'url': 'http://www.bbc.com/news/business-28299555',
        'info_dict': {
            'id': 'business-28299555',
            'title': 'Farnborough Airshow: Video highlights',
            'description': 'BBC reports and video highlights at the Farnborough Airshow.',
        },
        'playlist_count': 9,
        'skip': 'Save time',
    }, {
        # article with multiple videos embedded with `new SMP()`
        # broken
        'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
        'info_dict': {
            'id': '3662a707-0af9-3149-963f-47bea720b460',
            'title': 'BUGGER',
            'description': r're:BUGGER  The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
        },
        'playlist_count': 18,
    }, {
        # single video embedded with data-playable containing vpid
        'url': 'http://www.bbc.com/news/world-europe-32041533',
        'info_dict': {
            'id': 'p02mprgb',
            'ext': 'mp4',
            'title': 'Germanwings crash site aerial video',
            'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
            'duration': 47,
            'timestamp': 1427219242,
            'upload_date': '20150324',
            'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # article with single video embedded with data-playable containing XML playlist
        # with direct video links as progressiveDownloadUrl (for now these are extracted)
        # and playlist with f4m and m3u8 as streamingUrl
        'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
        'info_dict': {
            'id': '150615_telabyad_kentin_cogu',
            'ext': 'mp4',
            'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
            'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
            'timestamp': 1434397334,
            'upload_date': '20150615',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'now SIMORGH_DATA with no video',
    }, {
        # single video embedded with data-playable containing XML playlists (regional section)
        'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
        'info_dict': {
            'id': '39275083',
            'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
            'ext': 'mp4',
            'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
            'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
            'timestamp': 1434713142,
            'upload_date': '20150619',
            'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # single video from video playlist embedded with vxp-playlist-data JSON
        'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
        'info_dict': {
            'id': 'p02w6qjc',
            'ext': 'mp4',
            'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
            'duration': 56,
            'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
        },
        'params': {
            'skip_download': True,
        },
        'skip': '404 Not Found',
    }, {
        # single video story with __PWA_PRELOADED_STATE__
        'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
        'info_dict': {
            'id': 'p02q6gc4',
            'ext': 'mp4',
            'title': 'Tasting the spice of life in Jaffna',
            'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
            'timestamp': 1646058397,
            'upload_date': '20220228',
            'duration': 255,
            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
        },
    }, {
        # single video story without digitalData
        'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
        'info_dict': {
            'id': 'p018zqqg',
            'ext': 'mp4',
            'title': 'Hyundai Santa Fe Sport: Rock star',
            'description': 'md5:b042a26142c4154a6e472933cf20793d',
            'timestamp': 1415867444,
            'upload_date': '20141113',
        },
        'skip': 'redirects to TopGear home page',
    }, {
        # single video embedded with Morph
        # TODO: replacement test page
        'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
        'info_dict': {
            'id': 'p041vhd0',
            'ext': 'mp4',
            'title': "Nigeria v Japan - Men's First Round",
            'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
            'duration': 7980,
            'uploader': 'BBC Sport',
            'uploader_id': 'bbc_sport',
        },
        'skip': 'Video no longer in page',
    }, {
        # single video in __INITIAL_DATA__
        'url': 'http://www.bbc.com/sport/0/football/33653409',
        'info_dict': {
            'id': 'p02xycnp',
            'ext': 'mp4',
            'title': 'Ronaldo to Man Utd, Arsenal to spend?',
            'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
            'timestamp': 1437750175,
            'upload_date': '20150724',
            'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
            'duration': 140,
        },
    }, {
        # article with multiple videos embedded with Morph.setPayload
        'url': 'http://www.bbc.com/sport/0/football/34475836',
        'info_dict': {
            'id': '34475836',
            'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
            'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
        },
        'playlist_count': 3,
    }, {
        # Testing noplaylist
        'url': 'http://www.bbc.com/sport/0/football/34475836',
        'info_dict': {
            'id': 'p034ppnv',
            'ext': 'mp4',
            'title': 'All you need to know about Jurgen Klopp',
            'timestamp': 1444335081,
            'upload_date': '20151008',
            'duration': 122.0,
            'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
        },
        'params': {
            'noplaylist': True,
        },
    }, {
        # school report article with single video
        'url': 'http://www.bbc.co.uk/schoolreport/35744779',
        'info_dict': {
            'id': '35744779',
            'title': 'School which breaks down barriers in Jerusalem',
        },
        'playlist_count': 1,
        'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
    }, {
        # single video with playlist URL from weather section
        'url': 'http://www.bbc.com/weather/features/33601775',
        'only_matching': True,
    }, {
        # custom redirection to www.bbc.com
        # also, video with window.__INITIAL_DATA__
        'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
        'info_dict': {
            'id': 'p02xzws1',
            'ext': 'mp4',
            'title': "Pluto may have 'nitrogen glaciers'",
            'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
            'thumbnail': r're:https?://.+/.+\.jpg',
            'timestamp': 1437785037,
            'upload_date': '20150725',
            'duration': 105,
        },
    }, {
        # video with window.__INITIAL_DATA__ and value as JSON string
        'url': 'https://www.bbc.com/news/av/world-europe-59468682',
        'info_dict': {
            'id': 'p0b779gc',
            'ext': 'mp4',
            'title': 'Why France is making this woman a national hero',
            'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
            'thumbnail': r're:https?://.+/.+\.jpg',
            'timestamp': 1638215626,
            'upload_date': '20211129',
            'duration': 125,
        },
    }, {
        # video with script id __NEXT_DATA__ and value as JSON string
        'url': 'https://www.bbc.com/news/uk-68546268',
        'info_dict': {
            'id': 'p0hj0lq7',
            'ext': 'mp4',
            'title': 'Nasser Hospital doctor describes his treatment by IDF',
            'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
            'thumbnail': r're:https?://.+/.+\.jpg',
            'timestamp': 1710188248,
            'upload_date': '20240311',
            'duration': 104,
        },
    }, {
        # single video article embedded with data-media-vpid
        'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
        'only_matching': True,
    }, {
        # bbcthreeConfig
        'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
        'info_dict': {
            'id': 'p06556y7',
            'ext': 'mp4',
            'title': 'Things Not To Say to people that live on council estates',
            'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
            'duration': 360,
            'thumbnail': r're:https?://.+/.+\.jpg',
        },
    }, {
        # window.__PRELOADED_STATE__
        'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
        'info_dict': {
            'id': 'b0b9z4vz',
            'ext': 'mp4',
            'title': 'Prom 6: An American in Paris and Turangalila',
            'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
            'uploader': 'Radio 3',
            'uploader_id': 'bbc_radio_three',
        },
        'skip': '404 Not Found',
    }, {
        'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
        'info_dict': {
            'id': 'p06w9tws',
            'ext': 'mp4',
            'title': 'md5:2fabf12a726603193a2879a055f72514',
            'description': 'Learn English words and phrases from this story',
            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
        },
        'add_ie': [BBCCoUkIE.ie_key()],
    }, {
        # BBC Reel
        'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
        'info_dict': {
            'id': 'p07c6sb9',
            'ext': 'mp4',
            'title': 'The downsides of positive thinking',
            'description': 'The downsides of positive thinking',
            'duration': 235,
            'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
            'upload_date': '20220223',
            'timestamp': 1645632746,
        },
    }, {
        # BBC Sounds
        'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
        'info_dict': {
            'id': 'p0hrw4nr',
            'ext': 'mp4',
            'title': 'Are our coastlines being washed away?',
            'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
            'timestamp': 1713556800,
            'upload_date': '20240419',
            'duration': 1588,
            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
            'uploader': 'World Service',
            'uploader_id': 'bbc_world_service',
            'series': 'CrowdScience',
            'chapters': [],
        },
    }, {  # onion routes
        'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
        'only_matching': True,
    }, {
        'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
        'only_matching': True,
    }]

    @classmethod
    def suitable(cls, url):
        EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
        return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
                else super().suitable(url))

    def _extract_from_media_meta(self, media_meta, video_id):
        # Direct links to media in media metadata (e.g.
        # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
        # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
        source_files = media_meta.get('sourceFiles')
        if source_files:
            return [{
                'url': f['url'],
                'format_id': format_id,
                'ext': f.get('encoding'),
                'tbr': float_or_none(f.get('bitrate'), 1000),
                'filesize': int_or_none(f.get('filesize')),
            } for format_id, f in source_files.items() if f.get('url')], []

        programme_id = media_meta.get('externalId')
        if programme_id:
            return self._download_media_selector(programme_id)

        # Process playlist.sxml as legacy playlist
        href = media_meta.get('href')
        if href:
            playlist = self._download_legacy_playlist_url(href)
            _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
            return formats, subtitles

        return [], []

    def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
        programme_id, title, description, duration, formats, subtitles = \
            self._process_legacy_playlist_url(url, playlist_id)
        return {
            'id': programme_id,
            'title': title,
            'description': description,
            'duration': duration,
            'timestamp': timestamp,
            'formats': formats,
            'subtitles': subtitles,
        }

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

        webpage = self._download_webpage(url, playlist_id)

        json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
        timestamp = json_ld_info.get('timestamp')

        playlist_title = json_ld_info.get('title') or re.sub(
            r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None

        playlist_description = json_ld_info.get(
            'description') or self._og_search_description(webpage, default=None)

        if not timestamp:
            timestamp = parse_iso8601(self._search_regex(
                [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
                 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
                 r'"datePublished":\s*"([^"]+)'],
                webpage, 'date', default=None))

        entries = []

        # article with multiple videos embedded with playlist.sxml (e.g.
        # http://www.bbc.com/sport/0/football/34475836)
        playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
        playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
        if playlists:
            entries = [
                self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
                for playlist_url in playlists]

        # news article with multiple videos embedded with data-playable
        data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
        if data_playables:
            for _, data_playable_json in data_playables:
                data_playable = self._parse_json(
                    unescapeHTML(data_playable_json), playlist_id, fatal=False)
                if not data_playable:
                    continue
                settings = data_playable.get('settings', {})
                if settings:
                    # data-playable with video vpid in settings.playlistObject.items (e.g.
                    # http://www.bbc.com/news/world-us-canada-34473351)
                    playlist_object = settings.get('playlistObject', {})
                    if playlist_object:
                        items = playlist_object.get('items')
                        if items and isinstance(items, list):
                            title = playlist_object['title']
                            description = playlist_object.get('summary')
                            duration = int_or_none(items[0].get('duration'))
                            programme_id = items[0].get('vpid')
                            formats, subtitles = self._download_media_selector(programme_id)
                            entries.append({
                                'id': programme_id,
                                'title': title,
                                'description': description,
                                'timestamp': timestamp,
                                'duration': duration,
                                'formats': formats,
                                'subtitles': subtitles,
                            })
                    else:
                        # data-playable without vpid but with a playlist.sxml URLs
                        # in otherSettings.playlist (e.g.
                        # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
                        playlist = data_playable.get('otherSettings', {}).get('playlist', {})
                        if playlist:
                            entry = None
                            for key in ('streaming', 'progressiveDownload'):
                                playlist_url = playlist.get(f'{key}Url')
                                if not playlist_url:
                                    continue
                                try:
                                    info = self._extract_from_playlist_sxml(
                                        playlist_url, playlist_id, timestamp)
                                    if not entry:
                                        entry = info
                                    else:
                                        entry['title'] = info['title']
                                        entry['formats'].extend(info['formats'])
                                except ExtractorError as e:
                                    # Some playlist URL may fail with 500, at the same time
                                    # the other one may work fine (e.g.
                                    # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
                                    if isinstance(e.cause, HTTPError) and e.cause.status == 500:
                                        continue
                                    raise
                            if entry:
                                entries.append(entry)

        if entries:
            return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)

        # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
        group_id = self._search_regex(
            rf'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})',
            webpage, 'group id', default=None)
        if group_id:
            return self.url_result(
                f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)

        # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
        programme_id = self._search_regex(
            [rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"',
             rf'<param[^>]+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"',
             rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'],
            webpage, 'vpid', default=None)

        if programme_id:
            formats, subtitles = self._download_media_selector(programme_id)
            # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
            digital_data = self._parse_json(
                self._search_regex(
                    r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
                programme_id, fatal=False)
            page_info = digital_data.get('page', {}).get('pageInfo', {})
            title = page_info.get('pageName') or self._og_search_title(webpage)
            description = page_info.get('description') or self._og_search_description(webpage)
            timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
            return {
                'id': programme_id,
                'title': title,
                'description': description,
                'timestamp': timestamp,
                'formats': formats,
                'subtitles': subtitles,
            }

        # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
        initial_data = self._parse_json(self._html_search_regex(
            r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
            webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
        if initial_data:
            init_data = try_get(
                initial_data, lambda x: x['initData']['items'][0], dict) or {}
            smp_data = init_data.get('smpData') or {}
            clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
            version_id = clip_data.get('versionID')
            if version_id:
                title = smp_data['title']
                formats, subtitles = self._download_media_selector(version_id)
                image_url = smp_data.get('holdingImageURL')
                display_date = init_data.get('displayDate')
                topic_title = init_data.get('topicTitle')

                return {
                    'id': version_id,
                    'title': title,
                    'formats': formats,
                    'alt_title': init_data.get('shortTitle'),
                    'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
                    'description': smp_data.get('summary') or init_data.get('shortSummary'),
                    'upload_date': display_date.replace('-', '') if display_date else None,
                    'subtitles': subtitles,
                    'duration': int_or_none(clip_data.get('duration')),
                    'categories': [topic_title] if topic_title else None,
                }

        # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
        # Several setPayload calls may be present but the video(s)
        # should be in one that mentions leadMedia or videoData
        morph_payload = self._search_json(
            r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
            contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
            default={})
        if morph_payload:
            for lead_media in traverse_obj(morph_payload, (
                    'body', 'components', ..., 'props', 'leadMedia', {dict})):
                programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
                if not programme_id:
                    continue
                formats, subtitles = self._download_media_selector(programme_id)
                return {
                    'id': programme_id,
                    'title': lead_media.get('title') or self._og_search_title(webpage),
                    **traverse_obj(lead_media, {
                        'description': ('summary', {str}),
                        'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
                        'uploader': ('masterBrand', {str}),
                        'uploader_id': ('mid', {str}),
                    }),
                    'formats': formats,
                    'subtitles': subtitles,
                }
            body = self._parse_json(traverse_obj(morph_payload, (
                'body', 'content', 'article', 'body')), playlist_id, fatal=False)
            for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
                if video_data.get('vpid'):
                    video_id = video_data['vpid']
                    formats, subtitles = self._download_media_selector(video_id)
                    entry = {
                        'id': video_id,
                        'formats': formats,
                        'subtitles': subtitles,
                    }
                else:
                    video_id = video_data['pid']
                    entry = self.url_result(
                        f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
                        video_id, url_transparent=True)
                entry.update({
                    'timestamp': traverse_obj(morph_payload, (
                        'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}),
                    ),
                    **traverse_obj(video_data, {
                        'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
                        'title': (('title', 'caption'), {str}, any),
                        'duration': ('duration', {parse_duration}),
                    }),
                })
                if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
                    return entry
                entries.append(entry)
            if entries:
                playlist_title = traverse_obj(morph_payload, (
                    'body', 'content', 'article', 'headline', {str})) or playlist_title
                return self.playlist_result(
                    entries, playlist_id, playlist_title, playlist_description)

        # various PRELOADED_STATE JSON
        preload_state = self._search_json(
            r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
            'preload state', playlist_id, transform_source=js_to_json, default={})
        # PRELOADED_STATE with current programmme
        current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
        programme_id = traverse_obj(current_programme, ('id', {str}))
        if programme_id and current_programme.get('type') == 'playable_item':
            title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
            formats, subtitles = self._download_media_selector(programme_id)
            return {
                'id': programme_id,
                'title': title,
                'formats': formats,
                **traverse_obj(current_programme, {
                    'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
                    'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
                    'duration': ('duration', 'value', {int_or_none}),
                    'uploader': ('network', 'short_title', {str}),
                    'uploader_id': ('network', 'id', {str}),
                    'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
                    'series': ('titles', 'primary', {str}),
                }),
                'subtitles': subtitles,
                'chapters': traverse_obj(preload_state, (
                    'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
                        'title': ('titles', {lambda x: join_nonempty(
                            'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
                        'start_time': ('offset', 'start', {float_or_none}),
                        'end_time': ('offset', 'end', {float_or_none}),
                    }),
                ),
            }

        # PWA_PRELOADED_STATE with article video asset
        asset_id = traverse_obj(preload_state, (
            'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
            'assetVideo', 0, {str}, any))
        if asset_id:
            video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
            if video_id:
                article = traverse_obj(preload_state, (
                    'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))

                def image_url(image_id):
                    return traverse_obj(preload_state, (
                        'entities', 'images', image_id, 'url',
                        {lambda u: url_or_none(u.replace('$recipe', 'raw'))}))

                formats, subtitles = self._download_media_selector(video_id)
                return {
                    'id': video_id,
                    **traverse_obj(preload_state, ('entities', 'videos', asset_id, {
                        'title': ('title', {str}),
                        'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
                        'thumbnail': (0, {image_url}),
                        'duration': ('duration', {int_or_none}),
                    })),
                    'formats': formats,
                    'subtitles': subtitles,
                    'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
                }
            else:
                return self.url_result(
                    f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
                    asset_id, playlist_title, display_id=playlist_id,
                    description=playlist_description)

        bbc3_config = self._parse_json(
            self._search_regex(
                r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
                'bbcthree config', default='{}'),
            playlist_id, transform_source=js_to_json, fatal=False) or {}
        payload = bbc3_config.get('payload') or {}
        if payload:
            clip = payload.get('currentClip') or {}
            clip_vpid = clip.get('vpid')
            clip_title = clip.get('title')
            if clip_vpid and clip_title:
                formats, subtitles = self._download_media_selector(clip_vpid)
                return {
                    'id': clip_vpid,
                    'title': clip_title,
                    'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
                    'description': clip.get('description'),
                    'duration': parse_duration(clip.get('duration')),
                    'formats': formats,
                    'subtitles': subtitles,
                }
            bbc3_playlist = try_get(
                payload, lambda x: x['content']['bbcMedia']['playlist'],
                dict)
            if bbc3_playlist:
                playlist_title = bbc3_playlist.get('title') or playlist_title
                thumbnail = bbc3_playlist.get('holdingImageURL')
                entries = []
                for bbc3_item in bbc3_playlist['items']:
                    programme_id = bbc3_item.get('versionID')
                    if not programme_id:
                        continue
                    formats, subtitles = self._download_media_selector(programme_id)
                    entries.append({
                        'id': programme_id,
                        'title': playlist_title,
                        'thumbnail': thumbnail,
                        'timestamp': timestamp,
                        'formats': formats,
                        'subtitles': subtitles,
                    })
                return self.playlist_result(
                    entries, playlist_id, playlist_title, playlist_description)

        def parse_model(model):
            """Extract single video from model structure"""
            item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
            if not item_id:
                return
            formats, subtitles = self._download_media_selector(item_id)
            return {
                'id': item_id,
                'formats': formats,
                'subtitles': subtitles,
                **traverse_obj(model, {
                    'title': ('title', {str}),
                    'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
                    'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
                    'duration': ('versions', 0, 'duration', {int}),
                    'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
                }),
            }

        def is_type(*types):
            return lambda _, v: v['type'] in types

        initial_data = self._search_regex(
            r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
            'quoted preload state', default=None)
        if initial_data is None:
            initial_data = self._search_regex(
                r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
                'preload state', default='{}')
        else:
            initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
        initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
        if initial_data:
            for video_data in traverse_obj(initial_data, (
                    'stores', 'article', 'articleBodyContent', is_type('video'))):
                model = traverse_obj(video_data, (
                    'model', 'blocks', is_type('aresMedia'),
                    'model', 'blocks', is_type('aresMediaMetadata'),
                    'model', {dict}, any))
                entry = parse_model(model)
                if entry:
                    entries.append(entry)
            if entries:
                return self.playlist_result(
                    entries, playlist_id, playlist_title, playlist_description)

            def parse_media(media):
                if not media:
                    return
                for item in (try_get(media, lambda x: x['media']['items'], list) or []):
                    item_id = item.get('id')
                    item_title = item.get('title')
                    if not (item_id and item_title):
                        continue
                    formats, subtitles = self._download_media_selector(item_id)
                    item_desc = None
                    blocks = try_get(media, lambda x: x['summary']['blocks'], list)
                    if blocks:
                        summary = []
                        for block in blocks:
                            text = try_get(block, lambda x: x['model']['text'], str)
                            if text:
                                summary.append(text)
                        if summary:
                            item_desc = '\n\n'.join(summary)
                    item_time = None
                    for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
                        if try_get(meta, lambda x: x['label']) == 'Published':
                            item_time = unified_timestamp(meta.get('timestamp'))
                            break
                    entries.append({
                        'id': item_id,
                        'title': item_title,
                        'thumbnail': item.get('holdingImageUrl'),
                        'formats': formats,
                        'subtitles': subtitles,
                        'timestamp': item_time,
                        'description': strip_or_none(item_desc),
                        'duration': int_or_none(item.get('duration')),
                    })

            for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
                name = resp['name']
                if name == 'media-experience':
                    parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
                elif name == 'article':
                    for block in traverse_obj(resp, (
                            'data', (None, ('content', 'model')), 'blocks',
                            is_type('media', 'video'), 'model', {dict})):
                        parse_media(block)
            return self.playlist_result(
                entries, playlist_id, playlist_title, playlist_description)

        # extract from SIMORGH_DATA hydration JSON
        simorgh_data = self._search_json(
            r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
            'simorgh data', playlist_id, default={})
        if simorgh_data:
            done = False
            for video_data in traverse_obj(simorgh_data, (
                    'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
                model = traverse_obj(video_data, (
                    'model', 'blocks', is_type('aresMedia'),
                    'model', 'blocks', is_type('aresMediaMetadata'),
                    'model', {dict}, any))
                if video_data['type'] == 'video':
                    entry = parse_model(model)
                else:  # legacyMedia: no duration, subtitles
                    block_id, entry = traverse_obj(model, ('blockId', {str})), None
                    media_data = traverse_obj(simorgh_data, (
                        'pageData', 'promo', 'media',
                        {lambda x: x if x['id'] == block_id else None}))
                    formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
                        'url': ('url', {url_or_none}),
                        'ext': ('format', {str}),
                        'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
                    }))
                    if formats:
                        entry = {
                            'id': block_id,
                            'display_id': playlist_id,
                            'formats': formats,
                            'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
                            **traverse_obj(model, {
                                'title': ('title', {str}),
                                'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
                                'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
                                'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
                            }),
                        }
                        done = True
                if entry:
                    entries.append(entry)
                if done:
                    break
            if entries:
                return self.playlist_result(
                    entries, playlist_id, playlist_title, playlist_description)

        def extract_all(pattern):
            return list(filter(None, (
                self._parse_json(s, playlist_id, fatal=False)
                for s in re.findall(pattern, webpage))))

        # US accessed article with single embedded video (e.g.
        # https://www.bbc.com/news/uk-68546268)
        next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
                                 ('props', 'pageProps', 'page'))
        model = traverse_obj(next_data, (
            ..., 'contents', is_type('video'),
            'model', 'blocks', is_type('media'),
            'model', 'blocks', is_type('mediaMetadata'),
            'model', {dict}, any))
        if model and (entry := parse_model(model)):
            if not entry.get('timestamp'):
                entry['timestamp'] = traverse_obj(next_data, (
                    ..., 'contents', is_type('timestamp'), 'model',
                    'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
            entries.append(entry)
            return self.playlist_result(
                entries, playlist_id, playlist_title, playlist_description)

        # Multiple video article (e.g.
        # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
        EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?'
        entries = []
        for match in extract_all(r'new\s+SMP\(({.+?})\)'):
            embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
            if embed_url and re.match(EMBED_URL, embed_url):
                entries.append(embed_url)
        entries.extend(re.findall(
            rf'setPlaylist\("({EMBED_URL})"\)', webpage))
        if entries:
            return self.playlist_result(
                [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
                playlist_id, playlist_title, playlist_description)

        # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
        medias = extract_all(r"data-media-meta='({[^']+})'")

        if not medias:
            # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
            media_asset = self._search_regex(
                r'mediaAssetPage\.init\(\s*({.+?}), "/',
                webpage, 'media asset', default=None)
            if media_asset:
                media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
                medias = []
                for video in media_asset_page.get('videos', {}).values():
                    medias.extend(video.values())

        if not medias:
            # Multiple video playlist with single `now playing` entry (e.g.
            # http://www.bbc.com/news/video_and_audio/must_see/33767813)
            vxp_playlist = self._parse_json(
                self._search_regex(
                    r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
                    webpage, 'playlist data'),
                playlist_id)
            playlist_medias = []
            for item in vxp_playlist:
                media = item.get('media')
                if not media:
                    continue
                playlist_medias.append(media)
                # Download single video if found media with asset id matching the video id from URL
                if item.get('advert', {}).get('assetId') == playlist_id:
                    medias = [media]
                    break
            # Fallback to the whole playlist
            if not medias:
                medias = playlist_medias

        entries = []
        for num, media_meta in enumerate(medias, start=1):
            formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
            if not formats and not self.get_param('ignore_no_formats'):
                continue

            video_id = media_meta.get('externalId')
            if not video_id:
                video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}'

            title = media_meta.get('caption')
            if not title:
                title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}'

            duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))

            images = []
            for image in media_meta.get('images', {}).values():
                images.extend(image.values())
            if 'image' in media_meta:
                images.append(media_meta['image'])

            thumbnails = [{
                'url': image.get('href'),
                'width': int_or_none(image.get('width')),
                'height': int_or_none(image.get('height')),
            } for image in images]

            entries.append({
                'id': video_id,
                'title': title,
                'thumbnails': thumbnails,
                'duration': duration,
                'timestamp': timestamp,
                'formats': formats,
                'subtitles': subtitles,
            })

        return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)


class BBCCoUkArticleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
    IE_NAME = 'bbc.co.uk:article'
    IE_DESC = 'BBC articles'

    _TEST = {
        'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
        'info_dict': {
            'id': '3jNQLTMrPlYGTBn0WV6M2MS',
            'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
            'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
        },
        'playlist_count': 4,
        'add_ie': ['BBCCoUk'],
    }

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

        webpage = self._download_webpage(url, playlist_id)

        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage).strip()

        entries = [self.url_result(programme_url) for programme_url in re.findall(
            r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]

        return self.playlist_result(entries, playlist_id, title, description)


class BBCCoUkPlaylistBaseIE(InfoExtractor):
    def _entries(self, webpage, url, playlist_id):
        single_page = 'page' in urllib.parse.parse_qs(
            urllib.parse.urlparse(url).query)
        for page_num in itertools.count(2):
            for video_id in re.findall(
                    self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
                yield self.url_result(
                    self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
            if single_page:
                return
            next_page = self._search_regex(
                r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
                webpage, 'next page url', default=None, group='url')
            if not next_page:
                break
            webpage = self._download_webpage(
                urllib.parse.urljoin(url, next_page), playlist_id,
                f'Downloading page {page_num}', page_num)

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

        webpage = self._download_webpage(url, playlist_id)

        title, description = self._extract_title_and_description(webpage)

        return self.playlist_result(
            self._entries(webpage, url, playlist_id),
            playlist_id, title, description)


class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
    _VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P<id>{BBCCoUkIE._ID_REGEX})'

    @staticmethod
    def _get_default(episode, key, default_key='default'):
        return try_get(episode, lambda x: x[key][default_key])

    def _get_description(self, data):
        synopsis = data.get(self._DESCRIPTION_KEY) or {}
        return dict_get(synopsis, ('large', 'medium', 'small'))

    def _fetch_page(self, programme_id, per_page, series_id, page):
        elements = self._get_elements(self._call_api(
            programme_id, per_page, page + 1, series_id))
        for element in elements:
            episode = self._get_episode(element)
            episode_id = episode.get('id')
            if not episode_id:
                continue
            thumbnail = None
            image = self._get_episode_image(episode)
            if image:
                thumbnail = image.replace('{recipe}', 'raw')
            category = self._get_default(episode, 'labels', 'category')
            yield {
                '_type': 'url',
                'id': episode_id,
                'title': self._get_episode_field(episode, 'subtitle'),
                'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
                'thumbnail': thumbnail,
                'description': self._get_description(episode),
                'categories': [category] if category else None,
                'series': self._get_episode_field(episode, 'title'),
                'ie_key': BBCCoUkIE.ie_key(),
            }

    def _real_extract(self, url):
        pid = self._match_id(url)
        qs = parse_qs(url)
        series_id = qs.get('seriesId', [None])[0]
        page = qs.get('page', [None])[0]
        per_page = 36 if page else self._PAGE_SIZE
        fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
        entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
        playlist_data = self._get_playlist_data(self._call_api(pid, 1))
        return self.playlist_result(
            entries, pid, self._get_playlist_title(playlist_data),
            self._get_description(playlist_data))


class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
    IE_NAME = 'bbc.co.uk:iplayer:episodes'
    _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
    _TESTS = [{
        'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
        'info_dict': {
            'id': 'b05rcz9v',
            'title': 'The Disappearance',
            'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
        },
        'playlist_mincount': 8,
    }, {
        # all seasons
        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
        'info_dict': {
            'id': 'b094m5t9',
            'title': 'Doctor Foster',
            'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
        },
        'playlist_mincount': 10,
    }, {
        # explicit season
        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
        'info_dict': {
            'id': 'b094m5t9',
            'title': 'Doctor Foster',
            'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
        },
        'playlist_mincount': 5,
    }, {
        # all pages
        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
        'info_dict': {
            'id': 'm0004c4v',
            'title': 'Beechgrove',
            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
        },
        'playlist_mincount': 37,
    }, {
        # explicit page
        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
        'info_dict': {
            'id': 'm0004c4v',
            'title': 'Beechgrove',
            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
        },
        'playlist_mincount': 1,
    }]
    _PAGE_SIZE = 100
    _DESCRIPTION_KEY = 'synopsis'

    def _get_episode_image(self, episode):
        return self._get_default(episode, 'image')

    def _get_episode_field(self, episode, field):
        return self._get_default(episode, field)

    @staticmethod
    def _get_elements(data):
        return data['entities']['results']

    @staticmethod
    def _get_episode(element):
        return element.get('episode') or {}

    def _call_api(self, pid, per_page, page=1, series_id=None):
        variables = {
            'id': pid,
            'page': page,
            'perPage': per_page,
        }
        if series_id:
            variables['sliceId'] = series_id
        return self._download_json(
            'https://graph.ibl.api.bbc.co.uk/', pid, headers={
                'Content-Type': 'application/json',
            }, data=json.dumps({
                'id': '5692d93d5aac8d796a0305e895e61551',
                'variables': variables,
            }).encode())['data']['programme']

    @staticmethod
    def _get_playlist_data(data):
        return data

    def _get_playlist_title(self, data):
        return self._get_default(data, 'title')


class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
    IE_NAME = 'bbc.co.uk:iplayer:group'
    _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
    _TESTS = [{
        # Available for over a year unlike 30 days for most other programmes
        'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
        'info_dict': {
            'id': 'p02tcc32',
            'title': 'Bohemian Icons',
            'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
        },
        'playlist_mincount': 10,
    }, {
        # all pages
        'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
        'info_dict': {
            'id': 'p081d7j7',
            'title': 'Music in Scotland',
            'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
        },
        'playlist_mincount': 47,
    }, {
        # explicit page
        'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
        'info_dict': {
            'id': 'p081d7j7',
            'title': 'Music in Scotland',
            'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
        },
        'playlist_mincount': 11,
    }]
    _PAGE_SIZE = 200
    _DESCRIPTION_KEY = 'synopses'

    def _get_episode_image(self, episode):
        return self._get_default(episode, 'images', 'standard')

    def _get_episode_field(self, episode, field):
        return episode.get(field)

    @staticmethod
    def _get_elements(data):
        return data['elements']

    @staticmethod
    def _get_episode(element):
        return element

    def _call_api(self, pid, per_page, page=1, series_id=None):
        return self._download_json(
            f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes',
            pid, query={
                'page': page,
                'per_page': per_page,
            })['group_episodes']

    @staticmethod
    def _get_playlist_data(data):
        return data['group']

    def _get_playlist_title(self, data):
        return data.get('title')


class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
    IE_NAME = 'bbc.co.uk:playlist'
    _VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)'
    _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
    _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
    _TESTS = [{
        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
        'info_dict': {
            'id': 'b05rcz9v',
            'title': 'The Disappearance - Clips - BBC Four',
            'description': 'French thriller serial about a missing teenager.',
        },
        'playlist_mincount': 7,
    }, {
        # multipage playlist, explicit page
        'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
        'info_dict': {
            'id': 'b00mfl7n',
            'title': 'Frozen Planet - Clips - BBC One',
            'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
        },
        'playlist_mincount': 24,
    }, {
        # multipage playlist, all pages
        'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
        'info_dict': {
            'id': 'b00mfl7n',
            'title': 'Frozen Planet - Clips - BBC One',
            'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
        },
        'playlist_mincount': 142,
    }, {
        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
        'only_matching': True,
    }, {
        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
        'only_matching': True,
    }, {
        'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
        'only_matching': True,
    }]

    def _extract_title_and_description(self, webpage):
        title = self._og_search_title(webpage, fatal=False)
        description = self._og_search_description(webpage)
        return title, description
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								import functools
-												[bbc:playlist] Add support for pagination (Closes #10349)

											
										
										
											2016-08-15 21:36:23 +00:00
+								import itertools
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								import json
-												[bbccouk] Make subtitles non fatal (#19651)

											
										
										
											2019-02-24 14:01:25 +00:00
+								import re
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								import urllib.parse
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								import xml.etree.ElementTree
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
-												[bbccouk] Convert to new subtitles system

I haven't found any video available outside the UK, so I haven't added tests.

I have updated how the srt file is build, because (at least for www.bbc.co.uk/programmes/p02j9b69) the subtitles is inside 'span' elements.

											
										
										
											2015-02-19 15:46:41 +00:00
+								from .common import InfoExtractor
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								from ..networking.exceptions import HTTPError
-												[bbccouk] Improve extraction (Closes #5530)

											
										
										
											2015-04-30 21:59:13 +00:00
+								from ..utils import (
-												Update to ytdl-2021.03.03

											
										
										
											2021-03-03 05:49:33 +00:00
+								    ExtractorError,
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								    OnDemandPagedList,
-												[bbc] Add support for authentication

											
										
										
											2017-05-26 15:12:24 +00:00
+								    clean_html,
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
+								    dict_get,
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    float_or_none,
-												[bbc] Add support for authentication

											
										
										
											2017-05-26 15:12:24 +00:00
+								    get_element_by_class,
-												[bbccouk] Improve extraction (Closes #5530)

											
										
										
											2015-04-30 21:59:13 +00:00
+								    int_or_none,
-												[ie/bbc] Extract tracklist as chapters (#7788)

Authored by: garret1317
											
										
										
											2023-09-16 22:47:49 +00:00
+								    join_nonempty,
-												[bbc] Add support for bbcthree (closes #16612)

											
										
										
											2018-06-02 21:07:59 +00:00
+								    js_to_json,
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    parse_duration,
 								    parse_iso8601,
-												[utils] Add `parse_qs`

											
										
										
											2021-08-22 19:02:00 +00:00
+								    parse_qs,
-												Update to ytdl-commit-7e8b3f9

[youtube] Remove unused code
https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438

											
										
										
											2021-04-22 09:02:54 +00:00
+								    strip_or_none,
-												[ie/bbc] Extract tracklist as chapters (#7788)

Authored by: garret1317
											
										
										
											2023-09-16 22:47:49 +00:00
+								    traverse_obj,
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
+								    try_get,
-												[bbc] Add support for videos in news articles embedded with data-playable

											
										
										
											2015-10-10 14:34:06 +00:00
+								    unescapeHTML,
-												Update to ytdl-commit-7e8b3f9

[youtube] Remove unused code
https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438

											
										
										
											2021-04-22 09:02:54 +00:00
+								    unified_timestamp,
-												[bbccouk] Make subtitles non fatal (#19651)

											
										
										
											2019-02-24 14:01:25 +00:00
+								    url_or_none,
-												[bbc] Add support for authentication

											
										
										
											2017-05-26 15:12:24 +00:00
+								    urlencode_postdata,
 								    urljoin,
-												[bbccouk] Improve extraction (Closes #5530)

											
										
										
											2015-04-30 21:59:13 +00:00
+								)
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
-												[bbccouk] Convert to new subtitles system

I haven't found any video available outside the UK, so I haven't added tests.

I have updated how the srt file is build, because (at least for www.bbc.co.uk/programmes/p02j9b69) the subtitles is inside 'span' elements.

											
										
										
											2015-02-19 15:46:41 +00:00
+								class BBCCoUkIE(InfoExtractor):
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
+								    IE_NAME = 'bbc.co.uk'
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								    IE_DESC = 'BBC iPlayer'
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								    _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    _VALID_URL = rf'''(?x)
-												[bbccouk] Extend _VALID_URL (Closes #8116)

											
										
										
											2016-01-02 13:22:39 +00:00
+								                    https?://
 								                        (?:www\.)?bbc\.co\.uk/
 								                        (?:
 								                            programmes/(?!articles/)|
 								                            iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
-												[bbccouk] Extend _VALID_URL

											
										
										
											2017-06-29 15:29:28 +00:00
+								                            music/(?:clips|audiovideo/popular)[/#]|
-												[bbccouk] Add support for events URLs (closes #13893)

											
										
										
											2017-08-19 16:54:15 +00:00
+								                            radio/player/|
 								                            events/[^/]+/play/[^/]+/
-												[bbccouk] Extend _VALID_URL (Closes #8116)

											
										
										
											2016-01-02 13:22:39 +00:00
+								                        )
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                        (?P<id>{_ID_REGEX})(?!/(?:episodes|broadcasts|clips))
 								                    '''
-												[extractors] Use new framework for existing embeds (#4307)

`Brightcove` is difficult to migrate because it's subclasses may depend
on the signature of the current functions. So it is left as-is for now

Note: Tests have not been migrated

											
										
										
											2022-08-01 01:23:25 +00:00
+								    _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
-												[bbc] Add support for authentication

											
										
										
											2017-05-26 15:12:24 +00:00
+								    _LOGIN_URL = 'https://account.bbc.com/signin'
 								    _NETRC_MACHINE = 'bbc'
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								    _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
 								    _MEDIA_SETS = [
-												[bbc] Clarify iptv-all mediaset rationale

											
										
										
											2015-09-26 14:06:21 +00:00
+								        # Provides HQ HLS streams with even better quality that pc mediaset but fails
 								        # with geolocation in some cases when it's even not geo restricted at all (e.g.
-												[bbc] Allow selectionunavailable errors (Closes #7502)

											
										
										
											2015-11-14 17:08:13 +00:00
+								        # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								        'iptv-all',
 								        'pc',
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								    ]
-												BBCNewsIE: eliminate redundant function.
BBCCoUkIE._download_media_selector: use class variable instead of
hardcoded string for mediaselector_url template.

											
										
										
											2015-06-19 06:52:25 +00:00
-												[bbc] Extract legacy playlist embedded media

											
										
										
											2015-10-10 17:01:20 +00:00
+								    _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								    _TESTS = [
 								        {
-												[bbccouk] Replace test

This older episode is from 1994 and hopefully won't get deleted.
											
										
										
											2014-02-18 23:46:14 +00:00
+								            'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								            'info_dict': {
-												[bbccouk] Replace test

This older episode is from 1994 and hopefully won't get deleted.
											
										
										
											2014-02-18 23:46:14 +00:00
+								                'id': 'b039d07m',
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								                'ext': 'flv',
-												[bbc] Fix some tests

											
										
										
											2019-08-26 15:04:38 +00:00
+								                'title': 'Kaleidoscope, Leonard Cohen',
-												[bbccouk] Update test

											
										
										
											2015-01-02 16:13:26 +00:00
+								                'description': 'The Canadian poet and songwriter reflects on his musical career.',
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								            },
 								            'params': {
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								                # rtmp download
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								                'skip_download': True,
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            },
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
+								        },
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								        {
 								            'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
 								            'info_dict': {
 								                'id': 'b00yng1d',
 								                'ext': 'flv',
 								                'title': 'The Man in Black: Series 3: The Printed Name',
 								                'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
 								                'duration': 1800,
 								            },
 								            'params': {
 								                # rtmp download
 								                'skip_download': True,
-												[bbccouk] Skip test
											
										
										
											2014-02-17 17:26:12 +00:00
+								            },
 								            'skip': 'Episode is no longer available on BBC iPlayer Radio',
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								        },
 								        {
 								            'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
 								            'info_dict': {
 								                'id': 'b00yng1d',
 								                'ext': 'flv',
-												[bbc.co.uk] Fix TV episode test
											
										
										
											2014-02-08 21:04:21 +00:00
+								                'title': 'The Voice UK: Series 3: Blind Auditions 5',
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 09:37:17 +00:00
+								                'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
-												[bbc.co.uk] Fix TV episode test
											
										
										
											2014-02-08 21:04:21 +00:00
+								                'duration': 5100,
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								            },
 								            'params': {
 								                # rtmp download
 								                'skip_download': True,
 								            },
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
-												[bbccouk] Fix extraction (#4104, #4214)

											
										
										
											2014-11-30 16:37:56 +00:00
+								        },
 								        {
 								            'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
 								            'info_dict': {
 								                'id': 'b03k3pb7',
 								                'ext': 'flv',
 								                'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
 								                'description': '2. Invasion',
 								                'duration': 3600,
 								            },
 								            'params': {
 								                # rtmp download
 								                'skip_download': True,
 								            },
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								            'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
 								            'info_dict': {
 								                'id': 'b04v209v',
 								                'ext': 'flv',
 								                'title': 'Pete Tong, The Essential New Tune Special',
 								                'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
 								                'duration': 10800,
 								            },
 								            'params': {
 								                # rtmp download
 								                'skip_download': True,
-												[bbc.co.uk] Skip removed test video

											
										
										
											2015-12-05 08:51:13 +00:00
+								            },
 								            'skip': 'Episode is no longer available on BBC iPlayer Radio',
-												[bbccouk] Add support for music clips (Closes #4143)

											
										
										
											2015-01-03 14:43:40 +00:00
+								        }, {
-												[bbccouk] Update tests (Closes #8090)

											
										
										
											2016-01-03 20:55:25 +00:00
+								            'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
-												[bbccouk] Add support for music clips (Closes #4143)

											
										
										
											2015-01-03 14:43:40 +00:00
+								            'note': 'Audio',
 								            'info_dict': {
-												[bbccouk] Update tests (Closes #8090)

											
										
										
											2016-01-03 20:55:25 +00:00
+								                'id': 'p022h44j',
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								                'ext': 'flv',
-												[bbccouk] Update tests (Closes #8090)

											
										
										
											2016-01-03 20:55:25 +00:00
+								                'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
 								                'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
 								                'duration': 227,
-												[bbccouk] Add support for music clips (Closes #4143)

											
										
										
											2015-01-03 14:43:40 +00:00
+								            },
 								            'params': {
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								                # rtmp download
-												[bbccouk] Add support for music clips (Closes #4143)

											
										
										
											2015-01-03 14:43:40 +00:00
+								                'skip_download': True,
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            },
-												[bbccouk] Add support for music clips (Closes #4143)

											
										
										
											2015-01-03 14:43:40 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
 								            'note': 'Video',
 								            'info_dict': {
 								                'id': 'p025c103',
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								                'ext': 'flv',
-												[bbccouk] Add support for music clips (Closes #4143)

											
										
										
											2015-01-03 14:43:40 +00:00
+								                'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
 								                'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
 								                'duration': 226,
 								            },
 								            'params': {
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								                # rtmp download
-												[bbccouk] Add support for music clips (Closes #4143)

											
										
										
											2015-01-03 14:43:40 +00:00
+								                'skip_download': True,
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            },
-												[bbccouk] Add test for #5530

											
										
										
											2015-04-30 22:02:56 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
 								            'info_dict': {
 								                'id': 'p02n76xf',
 								                'ext': 'flv',
 								                'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
 								                'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
 								                'duration': 3540,
 								            },
 								            'params': {
 								                # rtmp download
 								                'skip_download': True,
 								            },
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								            'skip': 'geolocation',
-												[bbccouk] Fix description (closes #6006)

											
										
										
											2015-06-18 14:00:13 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
 								            'info_dict': {
 								                'id': 'b05zmgw1',
 								                'ext': 'flv',
 								                'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
 								                'title': 'Royal Academy Summer Exhibition',
 								                'duration': 3540,
 								            },
 								            'params': {
 								                # rtmp download
 								                'skip_download': True,
 								            },
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								            'skip': 'geolocation',
-												[bbc] Add test for programme that fails with iptv-all mediaset

											
										
										
											2015-09-26 14:07:12 +00:00
+								        }, {
 								            # iptv-all mediaset fails with geolocation however there is no geo restriction
 								            # for this programme at all
-												[bbccouk] Update tests (Closes #8090)

											
										
										
											2016-01-03 20:55:25 +00:00
+								            'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
-												[bbc] Add test for programme that fails with iptv-all mediaset

											
										
										
											2015-09-26 14:07:12 +00:00
+								            'info_dict': {
-												[bbccouk] Update tests (Closes #8090)

											
										
										
											2016-01-03 20:55:25 +00:00
+								                'id': 'b06rkms3',
-												[bbc] Add test for programme that fails with iptv-all mediaset

											
										
										
											2015-09-26 14:07:12 +00:00
+								                'ext': 'flv',
-												[bbccouk] Update tests (Closes #8090)

											
										
										
											2016-01-03 20:55:25 +00:00
+								                'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
 								                'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
-												[bbc] Add test for programme that fails with iptv-all mediaset

											
										
										
											2015-09-26 14:07:12 +00:00
+								            },
 								            'params': {
 								                # rtmp download
 								                'skip_download': True,
 								            },
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								            'skip': 'Now it\'s really geo-restricted',
-												[bbc] Add test for #8147

											
										
										
											2016-01-28 17:27:48 +00:00
+								        }, {
-												Start moving to ytdl-org

											
										
										
											2019-03-09 12:14:41 +00:00
+								            # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
-												[bbc] Add test for #8147

											
										
										
											2016-01-28 17:27:48 +00:00
+								            'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
 								            'info_dict': {
 								                'id': 'p028bfkj',
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								                'ext': 'flv',
-												[bbc] Add test for #8147

											
										
										
											2016-01-28 17:27:48 +00:00
+								                'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
 								                'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
 								            },
 								            'params': {
-												Revert "[bbc] extract more and better qulities from Unified Streaming Platform m3u8 manifests"

This reverts commit 0385aa6199206e4ba7745efec73be26c5826286a.

											
										
										
											2016-07-17 10:29:36 +00:00
+								                # rtmp download
-												[bbc] Add test for #8147

											
										
										
											2016-01-28 17:27:48 +00:00
+								                'skip_download': True,
 								            },
-												[bbccouk] Improve _VALID_URL

											
										
										
											2015-01-02 14:37:54 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
 								            'only_matching': True,
-												[bbccouk] Add support for music clips (Closes #4143)

											
										
										
											2015-01-03 14:43:40 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
 								            'only_matching': True,
-												[bbccouk] Improve _VALID_URL

											
										
										
											2015-01-30 17:47:09 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
 								            'only_matching': True,
-												[bbccouk] Extend _VALID_URL (Closes #8116)

											
										
										
											2016-01-02 13:22:39 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
 								            'only_matching': True,
-												[bbccouk] Extend _VALID_URL

											
										
										
											2017-06-29 15:29:28 +00:00
+								        }, {
 								            'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
 								            'only_matching': True,
-												[bbccouk] Add support for w-prefixed ids (closes #14056)

											
										
										
											2017-08-29 22:27:56 +00:00
+								        }, {
 								            'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
 								            'only_matching': True,
-												[bbccouk] Extend _ID_REGEX (closes #17270)

											
										
										
											2018-08-19 19:05:07 +00:00
+								        }, {
 								            'url': 'https://www.bbc.co.uk/programmes/m00005xn',
 								            'only_matching': True,
 								        }, {
 								            'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
 								            'only_matching': True,
-												[bbccouk] Extend _VALID_URL

											
										
										
											2017-06-29 15:29:28 +00:00
+								        }]
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
-												[extractor] Add `_perform_login` function (#2943)

* Adds new functions `_initialize_pre_login` and `_perform_login` as part of the extractor API
* Adds `ie.supports_login` to the public API
											
										
										
											2022-03-18 20:53:33 +00:00
+								    def _perform_login(self, username, password):
-												[bbc] Add support for authentication

											
										
										
											2017-05-26 15:12:24 +00:00
+								        login_page = self._download_webpage(
 								            self._LOGIN_URL, None, 'Downloading signin page')
 								        login_form = self._hidden_inputs(login_page)
 								        login_form.update({
 								            'username': username,
 								            'password': password,
 								        })
 								        post_url = urljoin(self._LOGIN_URL, self._search_regex(
 								            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
 								            'post url', default=self._LOGIN_URL, group='url'))
 								        response, urlh = self._download_webpage_handle(
 								            post_url, None, 'Logging in', data=urlencode_postdata(login_form),
 								            headers={'Referer': self._LOGIN_URL})
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								        if self._LOGIN_URL in urlh.url:
-												[bbc] Add support for authentication

											
										
										
											2017-05-26 15:12:24 +00:00
+								            error = clean_html(get_element_by_class('form-message', response))
 								            if error:
 								                raise ExtractorError(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                    f'Unable to login: {error}', expected=True)
-												[bbc] Add support for authentication

											
										
										
											2017-05-26 15:12:24 +00:00
+								            raise ExtractorError('Unable to log in')
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								    class MediaSelectionError(Exception):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        def __init__(self, error_id):
 								            self.id = error_id
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								    def _extract_asx_playlist(self, connection, programme_id):
 								        asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
 								        return [ref.get('href') for ref in asx.findall('./Entry/ref')]
 								    def _extract_items(self, playlist):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item')
-												[bbc] Extract legacy playlist embedded media

											
										
										
											2015-10-10 17:01:20 +00:00
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								    def _extract_medias(self, media_selection):
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								        error = media_selection.get('result')
 								        if error:
 								            raise BBCCoUkIE.MediaSelectionError(error)
 								        return media_selection.get('media') or []
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
 								    def _extract_connections(self, media):
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								        return media.get('connection') or []
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
-												[bbccouk] Convert to new subtitles system

I haven't found any video available outside the UK, so I haven't added tests.

I have updated how the srt file is build, because (at least for www.bbc.co.uk/programmes/p02j9b69) the subtitles is inside 'span' elements.

											
										
										
											2015-02-19 15:46:41 +00:00
+								    def _get_subtitles(self, media, programme_id):
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								        subtitles = {}
 								        for connection in self._extract_connections(media):
-												[bbccouk] Make subtitles non fatal (#19651)

											
										
										
											2019-02-24 14:01:25 +00:00
+								            cc_url = url_or_none(connection.get('href'))
 								            if not cc_url:
 								                continue
 								            captions = self._download_xml(
 								                cc_url, programme_id, 'Downloading captions', fatal=False)
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								            if not isinstance(captions, xml.etree.ElementTree.Element):
-												[bbccouk] Make subtitles non fatal (#19651)

											
										
										
											2019-02-24 14:01:25 +00:00
+								                continue
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								            subtitles['en'] = [
-												[bbccouk] Convert to new subtitles system

I haven't found any video available outside the UK, so I haven't added tests.

I have updated how the srt file is build, because (at least for www.bbc.co.uk/programmes/p02j9b69) the subtitles is inside 'span' elements.

											
										
										
											2015-02-19 15:46:41 +00:00
+								                {
 								                    'url': connection.get('href'),
 								                    'ext': 'ttml',
 								                },
 								            ]
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								            break
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								        return subtitles
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								    def _raise_extractor_error(self, media_selection_error):
 								        raise ExtractorError(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            f'{self.IE_NAME} returned error: {media_selection_error.id}',
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								            expected=True)
-												[bbccouk] Fix extraction (#4104, #4214)

											
										
										
											2014-11-30 16:37:56 +00:00
+								    def _download_media_selector(self, programme_id):
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								        last_exception = None
-												[ie/bbc] Extract more formats (#8321)

Closes #4902
Authored by: barsnick, dirkf
											
										
										
											2023-12-21 20:47:32 +00:00
+								        formats, subtitles = [], {}
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								        for media_set in self._MEDIA_SETS:
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								            try:
-												[ie/bbc] Extract more formats (#8321)

Closes #4902
Authored by: barsnick, dirkf
											
										
										
											2023-12-21 20:47:32 +00:00
+								                fmts, subs = self._download_media_selector_url(
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								                    self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
-												[ie/bbc] Extract more formats (#8321)

Closes #4902
Authored by: barsnick, dirkf
											
										
										
											2023-12-21 20:47:32 +00:00
+								                formats.extend(fmts)
 								                if subs:
 								                    self._merge_subtitles(subs, target=subtitles)
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								            except BBCCoUkIE.MediaSelectionError as e:
-												[bbc] Allow selectionunavailable errors (Closes #7502)

											
										
										
											2015-11-14 17:08:13 +00:00
+								                if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								                    last_exception = e
 								                    continue
 								                self._raise_extractor_error(e)
-												[ie/bbc] Extract more formats (#8321)

Closes #4902
Authored by: barsnick, dirkf
											
										
										
											2023-12-21 20:47:32 +00:00
+								        if last_exception:
 								            if formats or subtitles:
 								                self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
 								            else:
 								                self._raise_extractor_error(last_exception)
 								        return formats, subtitles
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								    def _download_media_selector_url(self, url, programme_id=None):
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								        media_selection = self._download_json(
 								            url, programme_id, 'Downloading media selection JSON',
-												[bbccouk] Use expected_status

											
										
										
											2018-06-17 21:04:47 +00:00
+								            expected_status=(403, 404))
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        return self._process_media_selector(media_selection, programme_id)
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    def _process_media_selector(self, media_selection, programme_id):
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
+								        formats = []
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								        subtitles = None
-												[bbc] reduce requests and improve format_id

											
										
										
											2016-08-06 18:24:59 +00:00
+								        urls = []
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
-												[bbccouk] Fix extraction (#4104, #4214)

											
										
										
											2014-11-30 16:37:56 +00:00
+								        for media in self._extract_medias(media_selection):
 								            kind = media.get('kind')
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								            if kind in ('video', 'audio'):
 								                bitrate = int_or_none(media.get('bitrate'))
 								                encoding = media.get('encoding')
 								                width = int_or_none(media.get('width'))
 								                height = int_or_none(media.get('height'))
 								                file_size = int_or_none(media.get('media_file_size'))
 								                for connection in self._extract_connections(media):
-												[bbc] reduce requests and improve format_id

											
										
										
											2016-08-06 18:24:59 +00:00
+								                    href = connection.get('href')
 								                    if href in urls:
 								                        continue
 								                    if href:
 								                        urls.append(href)
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                    conn_kind = connection.get('kind')
 								                    protocol = connection.get('protocol')
 								                    supplier = connection.get('supplier')
 								                    transfer_format = connection.get('transferFormat')
 								                    format_id = supplier or conn_kind or protocol
 								                    # ASX playlist
 								                    if supplier == 'asx':
 								                        for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
 								                            formats.append({
 								                                'url': ref,
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                                'format_id': f'ref{i}_{format_id}',
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                            })
 								                    elif transfer_format == 'dash':
 								                        formats.extend(self._extract_mpd_formats(
 								                            href, programme_id, mpd_id=format_id, fatal=False))
 								                    elif transfer_format == 'hls':
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								                        # TODO: let expected_status be passed into _extract_xxx_formats() instead
 								                        try:
 								                            fmts = self._extract_m3u8_formats(
 								                                href, programme_id, ext='mp4', entry_protocol='m3u8_native',
 								                                m3u8_id=format_id, fatal=False)
 								                        except ExtractorError as e:
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								                            if not (isinstance(e.exc_info[1], HTTPError)
 								                                    and e.exc_info[1].status in (403, 404)):
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								                                raise
 								                            fmts = []
 								                        formats.extend(fmts)
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                    elif transfer_format == 'hds':
 								                        formats.extend(self._extract_f4m_formats(
 								                            href, programme_id, f4m_id=format_id, fatal=False))
 								                    else:
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								                        if not supplier and bitrate:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                            format_id += f'-{bitrate}'
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                        fmt = {
 								                            'format_id': format_id,
 								                            'filesize': file_size,
 								                        }
 								                        if kind == 'video':
 								                            fmt.update({
 								                                'width': width,
 								                                'height': height,
-												[bbccouk] Treat bitrate as audio+video bitrate in media selector

											
										
										
											2017-04-10 15:56:22 +00:00
+								                                'tbr': bitrate,
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                                'vcodec': encoding,
 								                            })
 								                        else:
 								                            fmt.update({
 								                                'abr': bitrate,
 								                                'acodec': encoding,
 								                                'vcodec': 'none',
 								                            })
-												[bbccouk] Add support for https protocol in media selector (closes #12701)

											
										
										
											2017-04-10 15:53:06 +00:00
+								                        if protocol in ('http', 'https'):
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                            # Direct link
 								                            fmt.update({
 								                                'url': href,
 								                            })
 								                        elif protocol == 'rtmp':
 								                            application = connection.get('application', 'ondemand')
 								                            auth_string = connection.get('authString')
 								                            identifier = connection.get('identifier')
 								                            server = connection.get('server')
 								                            fmt.update({
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                                'url': f'{protocol}://{server}/{application}?{auth_string}',
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                                'play_path': identifier,
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                                'app': f'{application}?{auth_string}',
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                                'page_url': 'http://www.bbc.co.uk',
 								                                'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
 								                                'rtmp_live': False,
 								                                'ext': 'flv',
 								                            })
-												[bbccouk] Skip unrecognized formats in media selector (#12701)

											
										
										
											2017-04-10 15:53:51 +00:00
+								                        else:
 								                            continue
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                        formats.append(fmt)
-												[bbccouk] Fix extraction (#4104, #4214)

											
										
										
											2014-11-30 16:37:56 +00:00
+								            elif kind == 'captions':
-												[bbccouk] Convert to new subtitles system

I haven't found any video available outside the UK, so I haven't added tests.

I have updated how the srt file is build, because (at least for www.bbc.co.uk/programmes/p02j9b69) the subtitles is inside 'span' elements.

											
										
										
											2015-02-19 15:46:41 +00:00
+								                subtitles = self.extract_subtitles(media, programme_id)
-												[bbccouk] Fix extraction (#4104, #4214)

											
										
										
											2014-11-30 16:37:56 +00:00
+								        return formats, subtitles
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								    def _download_playlist(self, playlist_id):
 								        try:
 								            playlist = self._download_json(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json',
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								                playlist_id, 'Downloading playlist JSON')
-												[bbc] Get all available formats (#1717)

Authored by: nyuszika7h
											
										
										
											2021-11-19 14:57:01 +00:00
+								            formats = []
 								            subtitles = {}
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
-												[bbc] Get all available formats (#1717)

Authored by: nyuszika7h
											
										
										
											2021-11-19 14:57:01 +00:00
+								            for version in playlist.get('allAvailableVersions', []):
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								                smp_config = version['smpConfig']
 								                title = smp_config['title']
 								                description = smp_config['summary']
 								                for item in smp_config['items']:
 								                    kind = item['kind']
-												improve coding style

											
										
										
											2017-04-12 19:38:43 +00:00
+								                    if kind not in ('programme', 'radioProgramme'):
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								                        continue
 								                    programme_id = item.get('vpid')
-												[bbccouk] Make more robust (Closes #6345)

											
										
										
											2015-07-23 20:56:54 +00:00
+								                    duration = int_or_none(item.get('duration'))
-												[bbc] Get all available formats (#1717)

Authored by: nyuszika7h
											
										
										
											2021-11-19 14:57:01 +00:00
+								                    version_formats, version_subtitles = self._download_media_selector(programme_id)
 								                    types = version['types']
 								                    for f in version_formats:
 								                        f['format_note'] = ', '.join(types)
 								                        if any('AudioDescribed' in x for x in types):
 								                            f['language_preference'] = -10
 								                    formats += version_formats
 								                    for tag, subformats in (version_subtitles or {}).items():
-												[cleanup] Misc cleanup
Closes #1805, closes #1800

											
										
										
											2021-11-28 21:22:52 +00:00
+								                        subtitles.setdefault(tag, []).extend(subformats)
-												[bbc] Get all available formats (#1717)

Authored by: nyuszika7h
											
										
										
											2021-11-19 14:57:01 +00:00
 								            return programme_id, title, description, duration, formats, subtitles
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								        except ExtractorError as ee:
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								            if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								                raise
 								        # fallback to legacy playlist
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        return self._process_legacy_playlist(playlist_id)
 								    def _process_legacy_playlist_url(self, url, display_id):
 								        playlist = self._download_legacy_playlist_url(url, display_id)
 								        return self._extract_from_legacy_playlist(playlist, display_id)
 								    def _process_legacy_playlist(self, playlist_id):
 								        return self._process_legacy_playlist_url(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id)
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								    def _download_legacy_playlist_url(self, url, playlist_id=None):
 								        return self._download_xml(
 								            url, playlist_id, 'Downloading legacy playlist XML')
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    def _extract_from_legacy_playlist(self, playlist, playlist_id):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems')
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								        if no_items is not None:
 								            reason = no_items.get('reason')
 								            if reason == 'preAvailability':
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                msg = f'Episode {playlist_id} is not yet available'
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								            elif reason == 'postAvailability':
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                msg = f'Episode {playlist_id} is no longer available'
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								            elif reason == 'noMedia':
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                msg = f'Episode {playlist_id} is not currently available'
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								            else:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                msg = f'Episode {playlist_id} is not available: {reason}'
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								            raise ExtractorError(msg, expected=True)
 								        for item in self._extract_items(playlist):
 								            kind = item.get('kind')
-												improve coding style

											
										
										
											2017-04-12 19:38:43 +00:00
+								            if kind not in ('programme', 'radioProgramme'):
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								                continue
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text
 								            description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary')
-												[bbc] Fix FutureWarning

											
										
										
											2015-10-16 18:26:45 +00:00
+								            description = description_el.text if description_el is not None else None
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								            def get_programme_id(item):
 								                def get_from_attributes(item):
-												[bbccouk] PEP8

											
										
										
											2020-05-13 22:11:42 +00:00
+								                    for p in ('identifier', 'group'):
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								                        value = item.get(p)
 								                        if value and re.match(r'^[pb][\da-z]{7}$', value):
 								                            return value
 								                get_from_attributes(item)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator')
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								                if mediator is not None:
 								                    return get_from_attributes(mediator)
 								            programme_id = get_programme_id(item)
-												[bbccouk] Make more robust (Closes #6345)

											
										
										
											2015-07-23 20:56:54 +00:00
+								            duration = int_or_none(item.get('duration'))
-												[bbc] Extract legacy playlist embedded media

											
										
										
											2015-10-10 17:01:20 +00:00
 								            if programme_id:
 								                formats, subtitles = self._download_media_selector(programme_id)
 								            else:
 								                formats, subtitles = self._process_media_selector(item, playlist_id)
 								                programme_id = playlist_id
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
 								        return programme_id, title, description, duration, formats, subtitles
-												[bbccouk] Fix extraction (#4104, #4214)

											
										
										
											2014-11-30 16:37:56 +00:00
+								    def _real_extract(self, url):
 								        group_id = self._match_id(url)
 								        webpage = self._download_webpage(url, group_id, 'Downloading video page')
-												[bbccouk] Capture and output error message (closes #13518)

											
										
										
											2017-06-29 15:27:53 +00:00
+								        error = self._search_regex(
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								            r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
-												[bbccouk] Capture and output error message (closes #13518)

											
										
										
											2017-06-29 15:27:53 +00:00
+								            webpage, 'error', default=None)
 								        if error:
 								            raise ExtractorError(error, expected=True)
-												[bbccouk] Improve extraction (Closes #5530)

											
										
										
											2015-04-30 21:59:13 +00:00
+								        programme_id = None
-												[bbc.co.uk] Fix test_BBCCoUk

This is similar to the one in #7756, So also fixes #7756.

											
										
										
											2015-12-05 08:45:24 +00:00
+								        duration = None
-												[bbccouk] Improve extraction (Closes #5530)

											
										
										
											2015-04-30 21:59:13 +00:00
 								        tviplayer = self._search_regex(
 								            r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
 								            webpage, 'player', default=None)
 								        if tviplayer:
 								            player = self._parse_json(tviplayer, group_id).get('player', {})
 								            duration = int_or_none(player.get('duration'))
 								            programme_id = player.get('vpid')
 								        if not programme_id:
 								            programme_id = self._search_regex(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None)
-												[bbccouk] Improve extraction (Closes #5530)

											
										
										
											2015-04-30 21:59:13 +00:00
-												[bbccouk] Fix extraction (#4104, #4214)

											
										
										
											2014-11-30 16:37:56 +00:00
+								        if programme_id:
 								            formats, subtitles = self._download_media_selector(programme_id)
-												[bbccouk] Extend title extraction

											
										
										
											2016-01-02 13:42:11 +00:00
+								            title = self._og_search_title(webpage, default=None) or self._html_search_regex(
-												[bbc] Add another title regex (Closes #8340)

											
										
										
											2016-01-28 17:19:53 +00:00
+								                (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
 								                 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
-												[bbccouk] Improve extraction (Closes #5530)

											
										
										
											2015-04-30 21:59:13 +00:00
+								            description = self._search_regex(
-												[bbc] Add another description regex

											
										
										
											2016-01-28 17:23:13 +00:00
+								                (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
 								                 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
-												[bbc.co.uk] Fix test_BBCCoUk

This is similar to the one in #7756, So also fixes #7756.

											
										
										
											2015-12-05 08:45:24 +00:00
+								                webpage, 'description', default=None)
 								            if not description:
 								                description = self._html_search_meta('description', webpage)
-												[bbccouk] Fix extraction (#4104, #4214)

											
										
										
											2014-11-30 16:37:56 +00:00
+								        else:
-												[bbccouk] Switch to new JSON playlist format (Closes #4588)

											
										
										
											2014-12-28 21:00:24 +00:00
+								            programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
+								        return {
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								            'id': programme_id,
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
+								            'title': title,
 								            'description': description,
-												[bbccouk] Mute thumbnail

											
										
										
											2015-04-30 22:07:30 +00:00
+								            'thumbnail': self._og_search_thumbnail(webpage, default=None),
-												[bbc.co.uk] Add support for bbc.co.uk radio programmes (Closes #2184)
											
										
										
											2014-02-08 14:55:28 +00:00
+								            'duration': duration,
 								            'formats': formats,
-												[bbc.co.uk] Improve overall extractor structure, add subtitles support
(#2184)

Everything from http://www.bbc.co.uk/iplayer/ should be downloadable
now.
											
										
										
											2014-02-08 21:00:24 +00:00
+								            'subtitles': subtitles,
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
+								        }
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
-												[cleanup] Misc

Closes #5541

											
										
										
											2022-11-16 00:57:43 +00:00
+								class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    IE_NAME = 'bbc'
 								    IE_DESC = 'BBC'
-												[extractor/bbc] Support onion domains (#5211)

Authored by: DoubleCouponDay
											
										
										
											2022-11-04 15:25:17 +00:00
+								    _VALID_URL = r'''(?x)
 								        https?://(?:www\.)?(?:
 								            bbc\.(?:com|co\.uk)|
 								            bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
 								            bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
 								        )/(?:[^/]+/)+(?P<id>[^/#?]+)'''
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								    _MEDIA_SETS = [
 								        'pc',
-												[bbc] Extract better quality videos (#1113)

mobile-tablet-main only provides 540p25, so it shouldn't be used for the first attempt. Instead pc provides up to 720p50

Authored by: ajj8
											
										
										
											2021-09-28 22:37:33 +00:00
+								        'mobile-tablet-main',
-												[bbc] Improve work with mediaselection URLs

											
										
										
											2015-07-29 18:55:06 +00:00
+								    ]
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
 								    _TESTS = [{
-												[bbc] Update tests

											
										
										
											2015-10-10 17:56:55 +00:00
+								        # article with multiple videos embedded with data-playable containing vpids
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								        'url': 'http://www.bbc.com/news/world-europe-32668511',
 								        'info_dict': {
 								            'id': 'world-europe-32668511',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'title': 'Russia stages massive WW2 parade despite Western boycott',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								        },
 								        'playlist_count': 2,
-												bbc.py: correct syntax

											
										
										
											2015-07-17 07:47:02 +00:00
+								    }, {
-												[bbc] Update tests

											
										
										
											2015-10-10 17:56:55 +00:00
+								        # article with multiple videos embedded with data-playable (more videos)
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								        'url': 'http://www.bbc.com/news/business-28299555',
 								        'info_dict': {
 								            'id': 'business-28299555',
 								            'title': 'Farnborough Airshow: Video highlights',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'description': 'BBC reports and video highlights at the Farnborough Airshow.',
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								        },
 								        'playlist_count': 9,
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        'skip': 'Save time',
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								    }, {
 								        # article with multiple videos embedded with `new SMP()`
-												[bbc] Update tests

											
										
										
											2015-10-10 17:56:55 +00:00
+								        # broken
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								        'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
 								        'info_dict': {
 								            'id': '3662a707-0af9-3149-963f-47bea720b460',
-												[bbc] Update test

											
										
										
											2016-03-13 09:56:34 +00:00
+								            'title': 'BUGGER',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'description': r're:BUGGER  The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								        },
 								        'playlist_count': 18,
-												bbc.py: correct syntax

											
										
										
											2015-07-17 07:47:02 +00:00
+								    }, {
-												[bbc] Update tests

											
										
										
											2015-10-10 17:56:55 +00:00
+								        # single video embedded with data-playable containing vpid
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								        'url': 'http://www.bbc.com/news/world-europe-32041533',
 								        'info_dict': {
 								            'id': 'p02mprgb',
-												[bbc] Prefer iptv-all mediaset

											
										
										
											2015-10-10 18:37:39 +00:00
+								            'ext': 'mp4',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'title': 'Germanwings crash site aerial video',
 								            'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								            'duration': 47,
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'timestamp': 1427219242,
-												Fix tests, description formatting

											
										
										
											2015-06-25 05:31:32 +00:00
+								            'upload_date': '20150324',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								        },
 								        'params': {
 								            'skip_download': True,
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        },
-												bbc.py: correct syntax

											
										
										
											2015-07-17 07:47:02 +00:00
+								    }, {
-												[bbc] Update tests

											
										
										
											2015-10-10 17:56:55 +00:00
+								        # article with single video embedded with data-playable containing XML playlist
 								        # with direct video links as progressiveDownloadUrl (for now these are extracted)
 								        # and playlist with f4m and m3u8 as streamingUrl
-												Support BBC news in other languages, non-mediaselector videos

											
										
										
											2015-06-20 16:04:46 +00:00
+								        'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
 								        'info_dict': {
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'id': '150615_telabyad_kentin_cogu',
-												Support BBC news in other languages, non-mediaselector videos

											
										
										
											2015-06-20 16:04:46 +00:00
+								            'ext': 'mp4',
-												[bbc] fix test

											
										
										
											2016-08-06 18:36:12 +00:00
+								            'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
-												[bbc] Improve extraction from sxml playlists

											
										
										
											2016-07-19 15:49:38 +00:00
+								            'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'timestamp': 1434397334,
-												Fix tests, description formatting

											
										
										
											2015-06-25 05:31:32 +00:00
+								            'upload_date': '20150615',
-												Support BBC news in other languages, non-mediaselector videos

											
										
										
											2015-06-20 16:04:46 +00:00
+								        },
 								        'params': {
 								            'skip_download': True,
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        },
 								        'skip': 'now SIMORGH_DATA with no video',
-												[bbc] Add another test

											
										
										
											2015-10-10 17:14:25 +00:00
+								    }, {
-												[bbc] Update tests

											
										
										
											2015-10-10 17:56:55 +00:00
+								        # single video embedded with data-playable containing XML playlists (regional section)
-												Support BBC news in other languages, non-mediaselector videos

											
										
										
											2015-06-20 16:04:46 +00:00
+								        'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
 								        'info_dict': {
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'id': '39275083',
 								            'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
-												Support BBC news in other languages, non-mediaselector videos

											
										
										
											2015-06-20 16:04:46 +00:00
+								            'ext': 'mp4',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'timestamp': 1434713142,
-												Fix tests, description formatting

											
										
										
											2015-06-25 05:31:32 +00:00
+								            'upload_date': '20150619',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
-												Support BBC news in other languages, non-mediaselector videos

											
										
										
											2015-06-20 16:04:46 +00:00
+								        },
 								        'params': {
 								            'skip_download': True,
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        },
-												[bbc] Add support for vxp-playlist-data embeds (Closes #6453)

											
										
										
											2015-08-04 14:44:22 +00:00
+								    }, {
 								        # single video from video playlist embedded with vxp-playlist-data JSON
 								        'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
 								        'info_dict': {
 								            'id': 'p02w6qjc',
-												[bbc] Prefer iptv-all mediaset

											
										
										
											2015-10-10 18:37:39 +00:00
+								            'ext': 'mp4',
-												[bbc] Add support for vxp-playlist-data embeds (Closes #6453)

											
										
										
											2015-08-04 14:44:22 +00:00
+								            'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
 								            'duration': 56,
-												[bbc] Fix test_BBC_6

											
										
										
											2015-12-05 08:54:25 +00:00
+								            'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
-												[bbc] Add support for vxp-playlist-data embeds (Closes #6453)

											
										
										
											2015-08-04 14:44:22 +00:00
+								        },
 								        'params': {
 								            'skip_download': True,
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        },
 								        'skip': '404 Not Found',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    }, {
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        # single video story with __PWA_PRELOADED_STATE__
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
 								        'info_dict': {
 								            'id': 'p02q6gc4',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'ext': 'mp4',
 								            'title': 'Tasting the spice of life in Jaffna',
 								            'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
 								            'timestamp': 1646058397,
 								            'upload_date': '20220228',
 								            'duration': 255,
 								            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        },
 								    }, {
 								        # single video story without digitalData
 								        'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
 								        'info_dict': {
 								            'id': 'p018zqqg',
-												[bbc] Prefer iptv-all mediaset

											
										
										
											2015-10-10 18:37:39 +00:00
+								            'ext': 'mp4',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'title': 'Hyundai Santa Fe Sport: Rock star',
 								            'description': 'md5:b042a26142c4154a6e472933cf20793d',
-												[bbc] Extract article JSON and actualize tests

											
										
										
											2015-10-10 18:25:09 +00:00
+								            'timestamp': 1415867444,
 								            'upload_date': '20141113',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        },
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        'skip': 'redirects to TopGear home page',
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
+								    }, {
 								        # single video embedded with Morph
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        # TODO: replacement test page
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
+								        'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
 								        'info_dict': {
 								            'id': 'p041vhd0',
 								            'ext': 'mp4',
 								            'title': "Nigeria v Japan - Men's First Round",
 								            'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
 								            'duration': 7980,
 								            'uploader': 'BBC Sport',
 								            'uploader_id': 'bbc_sport',
 								        },
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        'skip': 'Video no longer in page',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    }, {
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        # single video in __INITIAL_DATA__
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        'url': 'http://www.bbc.com/sport/0/football/33653409',
 								        'info_dict': {
 								            'id': 'p02xycnp',
-												[bbc] Prefer iptv-all mediaset

											
										
										
											2015-10-10 18:37:39 +00:00
+								            'ext': 'mp4',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'title': 'Ronaldo to Man Utd, Arsenal to spend?',
 								            'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
 								            'timestamp': 1437750175,
 								            'upload_date': '20150724',
 								            'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            'duration': 140,
 								        },
-												[bbc] Add test for atricle with multiple videos embedded with playlist.sxml

											
										
										
											2015-10-10 14:55:46 +00:00
+								    }, {
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        # article with multiple videos embedded with Morph.setPayload
-												[bbc] Add test for atricle with multiple videos embedded with playlist.sxml

											
										
										
											2015-10-10 14:55:46 +00:00
+								        'url': 'http://www.bbc.com/sport/0/football/34475836',
 								        'info_dict': {
 								            'id': '34475836',
-												[bbc] Update test

											
										
										
											2016-03-13 09:59:54 +00:00
+								            'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
-												[bbc] Fix a test

											
										
										
											2016-04-16 10:00:19 +00:00
+								            'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
-												[bbc] Add test for atricle with multiple videos embedded with playlist.sxml

											
										
										
											2015-10-10 14:55:46 +00:00
+								        },
 								        'playlist_count': 3,
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								    }, {
 								        # Testing noplaylist
 								        'url': 'http://www.bbc.com/sport/0/football/34475836',
 								        'info_dict': {
 								            'id': 'p034ppnv',
 								            'ext': 'mp4',
 								            'title': 'All you need to know about Jurgen Klopp',
 								            'timestamp': 1444335081,
 								            'upload_date': '20151008',
 								            'duration': 122.0,
 								            'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
 								        },
 								        'params': {
 								            'noplaylist': True,
 								        },
-												[bbc] Update test

											
										
										
											2016-03-13 09:59:54 +00:00
+								    }, {
 								        # school report article with single video
 								        'url': 'http://www.bbc.co.uk/schoolreport/35744779',
 								        'info_dict': {
 								            'id': '35744779',
 								            'title': 'School which breaks down barriers in Jerusalem',
 								        },
 								        'playlist_count': 1,
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    }, {
 								        # single video with playlist URL from weather section
 								        'url': 'http://www.bbc.com/weather/features/33601775',
 								        'only_matching': True,
 								    }, {
 								        # custom redirection to www.bbc.com
-												Update to ytdl-commit-7e8b3f9

[youtube] Remove unused code
https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438

											
										
										
											2021-04-22 09:02:54 +00:00
+								        # also, video with window.__INITIAL_DATA__
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
-												Update to ytdl-commit-7e8b3f9

[youtube] Remove unused code
https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438

											
										
										
											2021-04-22 09:02:54 +00:00
+								        'info_dict': {
 								            'id': 'p02xzws1',
 								            'ext': 'mp4',
 								            'title': "Pluto may have 'nitrogen glaciers'",
 								            'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
 								            'thumbnail': r're:https?://.+/.+\.jpg',
 								            'timestamp': 1437785037,
 								            'upload_date': '20150725',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'duration': 105,
-												Update to ytdl-commit-7e8b3f9

[youtube] Remove unused code
https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438

											
										
										
											2021-04-22 09:02:54 +00:00
+								        },
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								    }, {
 								        # video with window.__INITIAL_DATA__ and value as JSON string
 								        'url': 'https://www.bbc.com/news/av/world-europe-59468682',
 								        'info_dict': {
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'id': 'p0b779gc',
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								            'ext': 'mp4',
 								            'title': 'Why France is making this woman a national hero',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								            'thumbnail': r're:https?://.+/.+\.jpg',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'timestamp': 1638215626,
 								            'upload_date': '20211129',
 								            'duration': 125,
 								        },
 								    }, {
 								        # video with script id __NEXT_DATA__ and value as JSON string
 								        'url': 'https://www.bbc.com/news/uk-68546268',
 								        'info_dict': {
 								            'id': 'p0hj0lq7',
 								            'ext': 'mp4',
 								            'title': 'Nasser Hospital doctor describes his treatment by IDF',
 								            'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
 								            'thumbnail': r're:https?://.+/.+\.jpg',
 								            'timestamp': 1710188248,
 								            'upload_date': '20240311',
 								            'duration': 104,
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								        },
-												[bbc] Extend vpid regex (Closes #9003)

											
										
										
											2016-03-27 17:22:51 +00:00
+								    }, {
 								        # single video article embedded with data-media-vpid
 								        'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
 								        'only_matching': True,
-												[bbc] Add support for bbcthree (closes #16612)

											
										
										
											2018-06-02 21:07:59 +00:00
+								    }, {
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								        # bbcthreeConfig
-												[bbc] Add support for bbcthree (closes #16612)

											
										
										
											2018-06-02 21:07:59 +00:00
+								        'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
 								        'info_dict': {
 								            'id': 'p06556y7',
 								            'ext': 'mp4',
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								            'title': 'Things Not To Say to people that live on council estates',
 								            'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
 								            'duration': 360,
 								            'thumbnail': r're:https?://.+/.+\.jpg',
-												[bbc] Add support for bbcthree (closes #16612)

											
										
										
											2018-06-02 21:07:59 +00:00
+								        },
-												[bbc] add support for BBC Radio Play pages(closes #17022)

											
										
										
											2018-07-21 10:49:55 +00:00
+								    }, {
 								        # window.__PRELOADED_STATE__
 								        'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
 								        'info_dict': {
 								            'id': 'b0b9z4vz',
 								            'ext': 'mp4',
 								            'title': 'Prom 6: An American in Paris and Turangalila',
 								            'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
 								            'uploader': 'Radio 3',
 								            'uploader_id': 'bbc_radio_three',
 								        },
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        'skip': '404 Not Found',
-												[bbc] Add support for another embed pattern (closes #18643)

											
										
										
											2018-12-31 16:20:40 +00:00
+								    }, {
 								        'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
 								        'info_dict': {
 								            'id': 'p06w9tws',
 								            'ext': 'mp4',
 								            'title': 'md5:2fabf12a726603193a2879a055f72514',
 								            'description': 'Learn English words and phrases from this story',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
-												[bbc] Add support for another embed pattern (closes #18643)

											
										
										
											2018-12-31 16:20:40 +00:00
+								        },
 								        'add_ie': [BBCCoUkIE.ie_key()],
-												Update to ytdl-2021.03.03

											
										
										
											2021-03-03 05:49:33 +00:00
+								    }, {
 								        # BBC Reel
 								        'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
 								        'info_dict': {
 								            'id': 'p07c6sb9',
 								            'ext': 'mp4',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'title': 'The downsides of positive thinking',
 								            'description': 'The downsides of positive thinking',
-												Update to ytdl-2021.03.03

											
										
										
											2021-03-03 05:49:33 +00:00
+								            'duration': 235,
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
 								            'upload_date': '20220223',
 								            'timestamp': 1645632746,
-												Update to ytdl-2021.03.03

											
										
										
											2021-03-03 05:49:33 +00:00
+								        },
-												[ie/bbc] Extract tracklist as chapters (#7788)

Authored by: garret1317
											
										
										
											2023-09-16 22:47:49 +00:00
+								    }, {
 								        # BBC Sounds
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
-												[ie/bbc] Extract tracklist as chapters (#7788)

Authored by: garret1317
											
										
										
											2023-09-16 22:47:49 +00:00
+								        'info_dict': {
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'id': 'p0hrw4nr',
-												[ie/bbc] Extract tracklist as chapters (#7788)

Authored by: garret1317
											
										
										
											2023-09-16 22:47:49 +00:00
+								            'ext': 'mp4',
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            'title': 'Are our coastlines being washed away?',
 								            'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
 								            'timestamp': 1713556800,
 								            'upload_date': '20240419',
 								            'duration': 1588,
 								            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
 								            'uploader': 'World Service',
 								            'uploader_id': 'bbc_world_service',
 								            'series': 'CrowdScience',
 								            'chapters': [],
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        },
-												[extractor/bbc] Support onion domains (#5211)

Authored by: DoubleCouponDay
											
										
										
											2022-11-04 15:25:17 +00:00
+								    }, {  # onion routes
 								        'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
 								        'only_matching': True,
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								    }]
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								    @classmethod
 								    def suitable(cls, url):
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								        EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
+								        return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                else super().suitable(url))
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								    def _extract_from_media_meta(self, media_meta, video_id):
 								        # Direct links to media in media metadata (e.g.
 								        # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
 								        # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
 								        source_files = media_meta.get('sourceFiles')
 								        if source_files:
 								            return [{
 								                'url': f['url'],
 								                'format_id': format_id,
 								                'ext': f.get('encoding'),
 								                'tbr': float_or_none(f.get('bitrate'), 1000),
 								                'filesize': int_or_none(f.get('filesize')),
 								            } for format_id, f in source_files.items() if f.get('url')], []
 								        programme_id = media_meta.get('externalId')
 								        if programme_id:
 								            return self._download_media_selector(programme_id)
 								        # Process playlist.sxml as legacy playlist
 								        href = media_meta.get('href')
 								        if href:
 								            playlist = self._download_legacy_playlist_url(href)
 								            _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
 								            return formats, subtitles
 								        return [], []
-												[bbc] Add one more scenario for data-playable embeds

											
										
										
											2015-10-10 15:32:27 +00:00
+								    def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
 								        programme_id, title, description, duration, formats, subtitles = \
 								            self._process_legacy_playlist_url(url, playlist_id)
 								        return {
 								            'id': programme_id,
 								            'title': title,
 								            'description': description,
 								            'duration': duration,
 								            'timestamp': timestamp,
 								            'formats': formats,
 								            'subtitles': subtitles,
 								        }
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								    def _real_extract(self, url):
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        playlist_id = self._match_id(url)
 								        webpage = self._download_webpage(url, playlist_id)
-												[bbc] Add proper default to _search_json_ld call

											
										
										
											2016-08-08 15:44:36 +00:00
+								        json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
-												[bbc] Use _search_json_ld

											
										
										
											2016-01-16 12:46:28 +00:00
+								        timestamp = json_ld_info.get('timestamp')
-												[bbc] Improve title and description extraction (Closes #8826, closes #8822)

											
										
										
											2016-03-13 09:54:56 +00:00
-												[extractor] Improve `_generic_title`

											
										
										
											2022-10-31 12:05:20 +00:00
+								        playlist_title = json_ld_info.get('title') or re.sub(
 								            r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
-												[bbc] Improve title and description extraction (Closes #8826, closes #8822)

											
										
										
											2016-03-13 09:54:56 +00:00
 								        playlist_description = json_ld_info.get(
 								            'description') or self._og_search_description(webpage, default=None)
-												[bbc] Extract article JSON and actualize tests

											
										
										
											2015-10-10 18:25:09 +00:00
 								        if not timestamp:
 								            timestamp = parse_iso8601(self._search_regex(
 								                [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
 								                 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
-												[bbc] PEP 8

											
										
										
											2015-10-10 18:39:28 +00:00
+								                 r'"datePublished":\s*"([^"]+)'],
-												[bbc] Extract article JSON and actualize tests

											
										
										
											2015-10-10 18:25:09 +00:00
+								                webpage, 'date', default=None))
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
-												[bbc] Support playlists of data-playable

											
										
										
											2015-10-10 17:40:20 +00:00
+								        entries = []
-												[bbc] Support multiple videos in articles embedded with playlist.sxml

											
										
										
											2015-10-10 14:40:56 +00:00
+								        # article with multiple videos embedded with playlist.sxml (e.g.
 								        # http://www.bbc.com/sport/0/football/34475836)
 								        playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
-												[bbc] Add another pattern for playlist.sxml (Closes #7743)

											
										
										
											2015-12-06 10:41:12 +00:00
+								        playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
-												[bbc] Support multiple videos in articles embedded with playlist.sxml

											
										
										
											2015-10-10 14:40:56 +00:00
+								        if playlists:
-												[bbc] Add one more scenario for data-playable embeds

											
										
										
											2015-10-10 15:32:27 +00:00
+								            entries = [
 								                self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
 								                for playlist_url in playlists]
-												Support BBC news in other languages, non-mediaselector videos

											
										
										
											2015-06-20 16:04:46 +00:00
-												[bbc] Support playlists of data-playable

											
										
										
											2015-10-10 17:40:20 +00:00
+								        # news article with multiple videos embedded with data-playable
 								        data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
 								        if data_playables:
 								            for _, data_playable_json in data_playables:
 								                data_playable = self._parse_json(
 								                    unescapeHTML(data_playable_json), playlist_id, fatal=False)
 								                if not data_playable:
 								                    continue
-												[bbc] Add one more scenario for data-playable embeds

											
										
										
											2015-10-10 15:32:27 +00:00
+								                settings = data_playable.get('settings', {})
 								                if settings:
-												[bbc] Support playlists of data-playable

											
										
										
											2015-10-10 17:40:20 +00:00
+								                    # data-playable with video vpid in settings.playlistObject.items (e.g.
 								                    # http://www.bbc.com/news/world-us-canada-34473351)
-												[bbc] Add one more scenario for data-playable embeds

											
										
										
											2015-10-10 15:32:27 +00:00
+								                    playlist_object = settings.get('playlistObject', {})
 								                    if playlist_object:
 								                        items = playlist_object.get('items')
 								                        if items and isinstance(items, list):
-												[bbc] Support playlists of data-playable

											
										
										
											2015-10-10 17:40:20 +00:00
+								                            title = playlist_object['title']
 								                            description = playlist_object.get('summary')
-												[bbc] Add one more scenario for data-playable embeds

											
										
										
											2015-10-10 15:32:27 +00:00
+								                            duration = int_or_none(items[0].get('duration'))
 								                            programme_id = items[0].get('vpid')
-												[bbc] Support playlists of data-playable

											
										
										
											2015-10-10 17:40:20 +00:00
+								                            formats, subtitles = self._download_media_selector(programme_id)
 								                            entries.append({
 								                                'id': programme_id,
 								                                'title': title,
 								                                'description': description,
 								                                'timestamp': timestamp,
 								                                'duration': duration,
 								                                'formats': formats,
 								                                'subtitles': subtitles,
 								                            })
 								                    else:
 								                        # data-playable without vpid but with a playlist.sxml URLs
 								                        # in otherSettings.playlist (e.g.
 								                        # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
 								                        playlist = data_playable.get('otherSettings', {}).get('playlist', {})
 								                        if playlist:
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                            entry = None
 								                            for key in ('streaming', 'progressiveDownload'):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                                playlist_url = playlist.get(f'{key}Url')
-												[bbc] Improve extraction from sxml playlists

											
										
										
											2016-07-19 15:49:38 +00:00
+								                                if not playlist_url:
 								                                    continue
 								                                try:
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                                    info = self._extract_from_playlist_sxml(
 								                                        playlist_url, playlist_id, timestamp)
 								                                    if not entry:
 								                                        entry = info
 								                                    else:
 								                                        entry['title'] = info['title']
 								                                        entry['formats'].extend(info['formats'])
-												Update to ytdl-2021.03.03

											
										
										
											2021-03-03 05:49:33 +00:00
+								                                except ExtractorError as e:
-												[bbc] Improve extraction from sxml playlists

											
										
										
											2016-07-19 15:49:38 +00:00
+								                                    # Some playlist URL may fail with 500, at the same time
 								                                    # the other one may work fine (e.g.
 								                                    # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								                                    if isinstance(e.cause, HTTPError) and e.cause.status == 500:
-												[bbc] Improve extraction from sxml playlists

											
										
										
											2016-07-19 15:49:38 +00:00
+								                                        continue
 								                                    raise
-												[bbc] improve extraction

- extract f4m and dash formats
- improve format sorting and listing
- improve extraction of articles with `otherSettings.playlist`

											
										
										
											2016-08-06 17:48:09 +00:00
+								                            if entry:
 								                                entries.append(entry)
-												[bbc] Support playlists of data-playable

											
										
										
											2015-10-10 17:40:20 +00:00
 								        if entries:
 								            return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
-												[bbc] Add support for another embed pattern (closes #18643)

											
										
										
											2018-12-31 16:20:40 +00:00
+								        # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
 								        group_id = self._search_regex(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            rf'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})',
-												[bbc] Add support for another embed pattern (closes #18643)

											
										
										
											2018-12-31 16:20:40 +00:00
+								            webpage, 'group id', default=None)
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								        if group_id:
-												[bbc] Add support for another embed pattern (closes #18643)

											
										
										
											2018-12-31 16:20:40 +00:00
+								            return self.url_result(
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								                f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
-												[bbc] Add support for another embed pattern (closes #18643)

											
										
										
											2018-12-31 16:20:40 +00:00
-												[bbc] Support playlists of data-playable

											
										
										
											2015-10-10 17:40:20 +00:00
+								        # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
 								        programme_id = self._search_regex(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            [rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"',
 								             rf'<param[^>]+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"',
 								             rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'],
-												[bbc] Support playlists of data-playable

											
										
										
											2015-10-10 17:40:20 +00:00
+								            webpage, 'vpid', default=None)
-												[bbc] Add support for videos in news articles embedded with data-playable

											
										
										
											2015-10-10 14:34:06 +00:00
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        if programme_id:
 								            formats, subtitles = self._download_media_selector(programme_id)
 								            # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
 								            digital_data = self._parse_json(
 								                self._search_regex(
 								                    r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
 								                programme_id, fatal=False)
 								            page_info = digital_data.get('page', {}).get('pageInfo', {})
 								            title = page_info.get('pageName') or self._og_search_title(webpage)
 								            description = page_info.get('description') or self._og_search_description(webpage)
 								            timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
 								            return {
 								                'id': programme_id,
 								                'title': title,
 								                'description': description,
 								                'timestamp': timestamp,
 								                'formats': formats,
 								                'subtitles': subtitles,
 								            }
-												bbc.py: correct syntax

											
										
										
											2015-07-17 07:47:02 +00:00
-												Update to ytdl-2021.03.03

											
										
										
											2021-03-03 05:49:33 +00:00
+								        # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
 								        initial_data = self._parse_json(self._html_search_regex(
 								            r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
 								            webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
 								        if initial_data:
 								            init_data = try_get(
 								                initial_data, lambda x: x['initData']['items'][0], dict) or {}
 								            smp_data = init_data.get('smpData') or {}
 								            clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
 								            version_id = clip_data.get('versionID')
 								            if version_id:
 								                title = smp_data['title']
 								                formats, subtitles = self._download_media_selector(version_id)
 								                image_url = smp_data.get('holdingImageURL')
 								                display_date = init_data.get('displayDate')
 								                topic_title = init_data.get('topicTitle')
 								                return {
 								                    'id': version_id,
 								                    'title': title,
 								                    'formats': formats,
 								                    'alt_title': init_data.get('shortTitle'),
 								                    'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
 								                    'description': smp_data.get('summary') or init_data.get('shortSummary'),
 								                    'upload_date': display_date.replace('-', '') if display_date else None,
 								                    'subtitles': subtitles,
 								                    'duration': int_or_none(clip_data.get('duration')),
 								                    'categories': [topic_title] if topic_title else None,
 								                }
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
+								        # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        # Several setPayload calls may be present but the video(s)
 								        # should be in one that mentions leadMedia or videoData
 								        morph_payload = self._search_json(
 								            r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
 								            contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
 								            default={})
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
+								        if morph_payload:
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            for lead_media in traverse_obj(morph_payload, (
 								                    'body', 'components', ..., 'props', 'leadMedia', {dict})):
 								                programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
+								                if not programme_id:
 								                    continue
 								                formats, subtitles = self._download_media_selector(programme_id)
 								                return {
 								                    'id': programme_id,
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								                    'title': lead_media.get('title') or self._og_search_title(webpage),
 								                    **traverse_obj(lead_media, {
 								                        'description': ('summary', {str}),
 								                        'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
 								                        'uploader': ('masterBrand', {str}),
 								                        'uploader_id': ('mid', {str}),
 								                    }),
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
+								                    'formats': formats,
 								                    'subtitles': subtitles,
 								                }
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            body = self._parse_json(traverse_obj(morph_payload, (
 								                'body', 'content', 'article', 'body')), playlist_id, fatal=False)
 								            for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
 								                if video_data.get('vpid'):
 								                    video_id = video_data['vpid']
 								                    formats, subtitles = self._download_media_selector(video_id)
 								                    entry = {
 								                        'id': video_id,
 								                        'formats': formats,
 								                        'subtitles': subtitles,
 								                    }
 								                else:
 								                    video_id = video_data['pid']
 								                    entry = self.url_result(
 								                        f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
 								                        video_id, url_transparent=True)
 								                entry.update({
 								                    'timestamp': traverse_obj(morph_payload, (
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                        'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}),
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								                    ),
 								                    **traverse_obj(video_data, {
 								                        'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
 								                        'title': (('title', 'caption'), {str}, any),
 								                        'duration': ('duration', {parse_duration}),
 								                    }),
 								                })
 								                if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
 								                    return entry
 								                entries.append(entry)
 								            if entries:
 								                playlist_title = traverse_obj(morph_payload, (
 								                    'body', 'content', 'article', 'headline', {str})) or playlist_title
 								                return self.playlist_result(
 								                    entries, playlist_id, playlist_title, playlist_description)
-												[bbc] Add support for morph embeds (Closes #10239)

											
										
										
											2016-08-07 11:01:50 +00:00
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        # various PRELOADED_STATE JSON
 								        preload_state = self._search_json(
 								            r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
 								            'preload state', playlist_id, transform_source=js_to_json, default={})
 								        # PRELOADED_STATE with current programmme
 								        current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
 								        programme_id = traverse_obj(current_programme, ('id', {str}))
 								        if programme_id and current_programme.get('type') == 'playable_item':
 								            title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
 								            formats, subtitles = self._download_media_selector(programme_id)
 								            return {
 								                'id': programme_id,
 								                'title': title,
 								                'formats': formats,
 								                **traverse_obj(current_programme, {
 								                    'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
 								                    'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
 								                    'duration': ('duration', 'value', {int_or_none}),
 								                    'uploader': ('network', 'short_title', {str}),
 								                    'uploader_id': ('network', 'id', {str}),
 								                    'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
 								                    'series': ('titles', 'primary', {str}),
 								                }),
 								                'subtitles': subtitles,
 								                'chapters': traverse_obj(preload_state, (
 								                    'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
 								                        'title': ('titles', {lambda x: join_nonempty(
 								                            'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
 								                        'start_time': ('offset', 'start', {float_or_none}),
 								                        'end_time': ('offset', 'end', {float_or_none}),
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                    }),
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								                ),
 								            }
 								        # PWA_PRELOADED_STATE with article video asset
 								        asset_id = traverse_obj(preload_state, (
 								            'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
 								            'assetVideo', 0, {str}, any))
 								        if asset_id:
 								            video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
 								            if video_id:
 								                article = traverse_obj(preload_state, (
 								                    'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
 								                def image_url(image_id):
 								                    return traverse_obj(preload_state, (
 								                        'entities', 'images', image_id, 'url',
 								                        {lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
 								                formats, subtitles = self._download_media_selector(video_id)
-												[bbc] add support for BBC Radio Play pages(closes #17022)

											
										
										
											2018-07-21 10:49:55 +00:00
+								                return {
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								                    'id': video_id,
 								                    **traverse_obj(preload_state, ('entities', 'videos', asset_id, {
 								                        'title': ('title', {str}),
 								                        'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
 								                        'thumbnail': (0, {image_url}),
 								                        'duration': ('duration', {int_or_none}),
 								                    })),
-												[bbc] add support for BBC Radio Play pages(closes #17022)

											
										
										
											2018-07-21 10:49:55 +00:00
+								                    'formats': formats,
 								                    'subtitles': subtitles,
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								                    'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
-												[bbc] add support for BBC Radio Play pages(closes #17022)

											
										
										
											2018-07-21 10:49:55 +00:00
+								                }
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            else:
 								                return self.url_result(
 								                    f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
 								                    asset_id, playlist_title, display_id=playlist_id,
 								                    description=playlist_description)
-												[bbc] add support for BBC Radio Play pages(closes #17022)

											
										
										
											2018-07-21 10:49:55 +00:00
-												[bbc] Add support for bbcthree (closes #16612)

											
										
										
											2018-06-02 21:07:59 +00:00
+								        bbc3_config = self._parse_json(
 								            self._search_regex(
 								                r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
 								                'bbcthree config', default='{}'),
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								            playlist_id, transform_source=js_to_json, fatal=False) or {}
 								        payload = bbc3_config.get('payload') or {}
 								        if payload:
 								            clip = payload.get('currentClip') or {}
 								            clip_vpid = clip.get('vpid')
 								            clip_title = clip.get('title')
 								            if clip_vpid and clip_title:
 								                formats, subtitles = self._download_media_selector(clip_vpid)
 								                return {
 								                    'id': clip_vpid,
 								                    'title': clip_title,
 								                    'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
 								                    'description': clip.get('description'),
 								                    'duration': parse_duration(clip.get('duration')),
 								                    'formats': formats,
 								                    'subtitles': subtitles,
 								                }
-												[bbc] Add support for bbcthree (closes #16612)

											
										
										
											2018-06-02 21:07:59 +00:00
+								            bbc3_playlist = try_get(
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								                payload, lambda x: x['content']['bbcMedia']['playlist'],
-												[bbc] Add support for bbcthree (closes #16612)

											
										
										
											2018-06-02 21:07:59 +00:00
+								                dict)
 								            if bbc3_playlist:
 								                playlist_title = bbc3_playlist.get('title') or playlist_title
 								                thumbnail = bbc3_playlist.get('holdingImageURL')
 								                entries = []
 								                for bbc3_item in bbc3_playlist['items']:
 								                    programme_id = bbc3_item.get('versionID')
 								                    if not programme_id:
 								                        continue
 								                    formats, subtitles = self._download_media_selector(programme_id)
 								                    entries.append({
 								                        'id': programme_id,
 								                        'title': playlist_title,
 								                        'thumbnail': thumbnail,
 								                        'timestamp': timestamp,
 								                        'formats': formats,
 								                        'subtitles': subtitles,
 								                    })
 								                return self.playlist_result(
 								                    entries, playlist_id, playlist_title, playlist_description)
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        def parse_model(model):
 								            """Extract single video from model structure"""
 								            item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
 								            if not item_id:
 								                return
 								            formats, subtitles = self._download_media_selector(item_id)
 								            return {
 								                'id': item_id,
 								                'formats': formats,
 								                'subtitles': subtitles,
 								                **traverse_obj(model, {
 								                    'title': ('title', {str}),
 								                    'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
 								                    'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
 								                    'duration': ('versions', 0, 'duration', {int}),
 								                    'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                }),
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            }
 								        def is_type(*types):
 								            return lambda _, v: v['type'] in types
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								        initial_data = self._search_regex(
 								            r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
 								            'quoted preload state', default=None)
 								        if initial_data is None:
 								            initial_data = self._search_regex(
 								                r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
-												[ie/bbc] Fix JSON parsing bug

Authored by: bashonly

											
										
										
											2023-12-21 20:46:00 +00:00
+								                'preload state', default='{}')
-												Update to ytdl-commit-6508688

Make default upload_/release_date a compat_str
https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a

Except:
* "[NDR] Overhaul NDR and NJoy extractors" https://github.com/ytdl-org/youtube-dl/pull/30531
    - https://github.com/ytdl-org/youtube-dl/commit/01824d275bfa7efbaca274b38c1ddc2b03f12f5d
    - https://github.com/ytdl-org/youtube-dl/commit/39a98b09a2acf50dc64bc41185be723b98e740b9
    - https://github.com/ytdl-org/youtube-dl/commit/f0a05a55c2ee512880546c056cfbec5ad3399798
    - https://github.com/ytdl-org/youtube-dl/commit/4186e817772d49d6f66b07c5ac8c248f026a6446

											
										
										
											2022-03-04 17:01:04 +00:00
+								        else:
 								            initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
 								        initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								        if initial_data:
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								            for video_data in traverse_obj(initial_data, (
 								                    'stores', 'article', 'articleBodyContent', is_type('video'))):
 								                model = traverse_obj(video_data, (
 								                    'model', 'blocks', is_type('aresMedia'),
 								                    'model', 'blocks', is_type('aresMediaMetadata'),
 								                    'model', {dict}, any))
 								                entry = parse_model(model)
 								                if entry:
 								                    entries.append(entry)
 								            if entries:
 								                return self.playlist_result(
 								                    entries, playlist_id, playlist_title, playlist_description)
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								            def parse_media(media):
 								                if not media:
 								                    return
 								                for item in (try_get(media, lambda x: x['media']['items'], list) or []):
 								                    item_id = item.get('id')
 								                    item_title = item.get('title')
 								                    if not (item_id and item_title):
 								                        continue
 								                    formats, subtitles = self._download_media_selector(item_id)
-												Update to ytdl-commit-7e8b3f9

[youtube] Remove unused code
https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438

											
										
										
											2021-04-22 09:02:54 +00:00
+								                    item_desc = None
 								                    blocks = try_get(media, lambda x: x['summary']['blocks'], list)
 								                    if blocks:
 								                        summary = []
 								                        for block in blocks:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                            text = try_get(block, lambda x: x['model']['text'], str)
-												Update to ytdl-commit-7e8b3f9

[youtube] Remove unused code
https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438

											
										
										
											2021-04-22 09:02:54 +00:00
+								                            if text:
 								                                summary.append(text)
 								                        if summary:
 								                            item_desc = '\n\n'.join(summary)
 								                    item_time = None
 								                    for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
 								                        if try_get(meta, lambda x: x['label']) == 'Published':
 								                            item_time = unified_timestamp(meta.get('timestamp'))
 								                            break
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								                    entries.append({
 								                        'id': item_id,
 								                        'title': item_title,
 								                        'thumbnail': item.get('holdingImageUrl'),
 								                        'formats': formats,
 								                        'subtitles': subtitles,
-												Update to ytdl-commit-7e8b3f9

[youtube] Remove unused code
https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438

											
										
										
											2021-04-22 09:02:54 +00:00
+								                        'timestamp': item_time,
 								                        'description': strip_or_none(item_desc),
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								                        'duration': int_or_none(item.get('duration')),
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								                    })
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
 								            for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
 								                name = resp['name']
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								                if name == 'media-experience':
 								                    parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
 								                elif name == 'article':
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								                    for block in traverse_obj(resp, (
 								                            'data', (None, ('content', 'model')), 'blocks',
 								                            is_type('media', 'video'), 'model', {dict})):
 								                        parse_media(block)
-												Updated to release 2020.11.26

											
										
										
											2020-11-26 17:27:34 +00:00
+								            return self.playlist_result(
 								                entries, playlist_id, playlist_title, playlist_description)
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        # extract from SIMORGH_DATA hydration JSON
 								        simorgh_data = self._search_json(
 								            r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
 								            'simorgh data', playlist_id, default={})
 								        if simorgh_data:
 								            done = False
 								            for video_data in traverse_obj(simorgh_data, (
 								                    'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
 								                model = traverse_obj(video_data, (
 								                    'model', 'blocks', is_type('aresMedia'),
 								                    'model', 'blocks', is_type('aresMediaMetadata'),
 								                    'model', {dict}, any))
 								                if video_data['type'] == 'video':
 								                    entry = parse_model(model)
 								                else:  # legacyMedia: no duration, subtitles
 								                    block_id, entry = traverse_obj(model, ('blockId', {str})), None
 								                    media_data = traverse_obj(simorgh_data, (
 								                        'pageData', 'promo', 'media',
 								                        {lambda x: x if x['id'] == block_id else None}))
 								                    formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
 								                        'url': ('url', {url_or_none}),
 								                        'ext': ('format', {str}),
 								                        'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
 								                    }))
 								                    if formats:
 								                        entry = {
 								                            'id': block_id,
 								                            'display_id': playlist_id,
 								                            'formats': formats,
 								                            'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
 								                            **traverse_obj(model, {
 								                                'title': ('title', {str}),
 								                                'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
 								                                'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
 								                                'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
 								                            }),
 								                        }
 								                        done = True
 								                if entry:
 								                    entries.append(entry)
 								                if done:
 								                    break
 								            if entries:
 								                return self.playlist_result(
 								                    entries, playlist_id, playlist_title, playlist_description)
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								        def extract_all(pattern):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            return list(filter(None, (
 								                self._parse_json(s, playlist_id, fatal=False)
 								                for s in re.findall(pattern, webpage))))
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
-												[ie/bbc] Fix and extend extraction (#9705)

Closes #9701
Authored by: kylegustavo, dirkf, pukkandan
											
										
										
											2024-05-17 06:20:13 +00:00
+								        # US accessed article with single embedded video (e.g.
 								        # https://www.bbc.com/news/uk-68546268)
 								        next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
 								                                 ('props', 'pageProps', 'page'))
 								        model = traverse_obj(next_data, (
 								            ..., 'contents', is_type('video'),
 								            'model', 'blocks', is_type('media'),
 								            'model', 'blocks', is_type('mediaMetadata'),
 								            'model', {dict}, any))
 								        if model and (entry := parse_model(model)):
 								            if not entry.get('timestamp'):
 								                entry['timestamp'] = traverse_obj(next_data, (
 								                    ..., 'contents', is_type('timestamp'), 'model',
 								                    'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
 								            entries.append(entry)
 								            return self.playlist_result(
 								                entries, playlist_id, playlist_title, playlist_description)
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								        # Multiple video article (e.g.
 								        # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?'
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								        entries = []
 								        for match in extract_all(r'new\s+SMP\(({.+?})\)'):
 								            embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
 								            if embed_url and re.match(EMBED_URL, embed_url):
 								                entries.append(embed_url)
 								        entries.extend(re.findall(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            rf'setPlaylist\("({EMBED_URL})"\)', webpage))
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								        if entries:
 								            return self.playlist_result(
-												[bbc] PEP 8

											
										
										
											2016-08-07 11:05:13 +00:00
+								                [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								                playlist_id, playlist_title, playlist_description)
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								        # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
-												[bbc] Add support for direct bbc.co.uk embeds

											
										
										
											2015-07-27 16:05:51 +00:00
+								        medias = extract_all(r"data-media-meta='({[^']+})'")
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								        if not medias:
 								            # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
-												[bbc] Add support for vxp-playlist-data embeds (Closes #6453)

											
										
										
											2015-08-04 14:44:22 +00:00
+								            media_asset = self._search_regex(
 								                r'mediaAssetPage\.init\(\s*({.+?}), "/',
 								                webpage, 'media asset', default=None)
 								            if media_asset:
 								                media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
 								                medias = []
 								                for video in media_asset_page.get('videos', {}).values():
 								                    medias.extend(video.values())
 								        if not medias:
 								            # Multiple video playlist with single `now playing` entry (e.g.
 								            # http://www.bbc.com/news/video_and_audio/must_see/33767813)
 								            vxp_playlist = self._parse_json(
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								                self._search_regex(
-												[bbc] Add support for vxp-playlist-data embeds (Closes #6453)

											
										
										
											2015-08-04 14:44:22 +00:00
+								                    r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
 								                    webpage, 'playlist data'),
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								                playlist_id)
-												[bbc] Add support for vxp-playlist-data embeds (Closes #6453)

											
										
										
											2015-08-04 14:44:22 +00:00
+								            playlist_medias = []
 								            for item in vxp_playlist:
 								                media = item.get('media')
 								                if not media:
 								                    continue
 								                playlist_medias.append(media)
 								                # Download single video if found media with asset id matching the video id from URL
 								                if item.get('advert', {}).get('assetId') == playlist_id:
 								                    medias = [media]
 								                    break
 								            # Fallback to the whole playlist
 								            if not medias:
 								                medias = playlist_medias
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								        entries = []
 								        for num, media_meta in enumerate(medias, start=1):
 								            formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
-												[extractor] Add `write_debug` and `get_param`

											
										
										
											2021-05-17 12:23:08 +00:00
+								            if not formats and not self.get_param('ignore_no_formats'):
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								                continue
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            video_id = media_meta.get('externalId')
 								            if not video_id:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}'
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								            title = media_meta.get('caption')
 								            if not title:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}'
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
 								            duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
-												Fix tests, description formatting

											
										
										
											2015-06-25 05:31:32 +00:00
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								            images = []
 								            for image in media_meta.get('images', {}).values():
 								                images.extend(image.values())
 								            if 'image' in media_meta:
 								                images.append(media_meta['image'])
 								            thumbnails = [{
 								                'url': image.get('href'),
 								                'width': int_or_none(image.get('width')),
 								                'height': int_or_none(image.get('height')),
 								            } for image in images]
 								            entries.append({
 								                'id': video_id,
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								                'title': title,
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								                'thumbnails': thumbnails,
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								                'duration': duration,
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								                'timestamp': timestamp,
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
+								                'formats': formats,
 								                'subtitles': subtitles,
-												bbc.py: correct syntax

											
										
										
											2015-07-17 07:47:02 +00:00
+								            })
-												toss new stuff into old file

											
										
										
											2015-06-20 13:22:13 +00:00
-												[bbc] Improve playlist extraction, refactor, expand support and document

											
										
										
											2015-07-25 14:21:42 +00:00
+								        return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
-												[bbc.co.uk:article] Add new extractor (#7257)

											
										
										
											2015-10-22 13:13:03 +00:00
 								class BBCCoUkArticleIE(InfoExtractor):
-												Improve some _VALID_URLs

											
										
										
											2016-09-08 11:29:05 +00:00
+								    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
-												[bbc.co.uk:article] Add new extractor (#7257)

											
										
										
											2015-10-22 13:13:03 +00:00
+								    IE_NAME = 'bbc.co.uk:article'
 								    IE_DESC = 'BBC articles'
 								    _TEST = {
 								        'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
 								        'info_dict': {
 								            'id': '3jNQLTMrPlYGTBn0WV6M2MS',
 								            'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
 								            'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
 								        },
 								        'playlist_count': 4,
 								        'add_ie': ['BBCCoUk'],
 								    }
 								    def _real_extract(self, url):
 								        playlist_id = self._match_id(url)
 								        webpage = self._download_webpage(url, playlist_id)
 								        title = self._og_search_title(webpage)
 								        description = self._og_search_description(webpage).strip()
 								        entries = [self.url_result(programme_url) for programme_url in re.findall(
 								            r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
 								        return self.playlist_result(entries, playlist_id, title, description)
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
 								class BBCCoUkPlaylistBaseIE(InfoExtractor):
-												[bbc:playlist] Add support for pagination (Closes #10349)

											
										
										
											2016-08-15 21:36:23 +00:00
+								    def _entries(self, webpage, url, playlist_id):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        single_page = 'page' in urllib.parse.parse_qs(
 								            urllib.parse.urlparse(url).query)
-												[bbc:playlist] Add support for pagination (Closes #10349)

											
										
										
											2016-08-15 21:36:23 +00:00
+								        for page_num in itertools.count(2):
 								            for video_id in re.findall(
 								                    self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
 								                yield self.url_result(
 								                    self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
 								            if single_page:
 								                return
 								            next_page = self._search_regex(
 								                r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
 								                webpage, 'next page url', default=None, group='url')
 								            if not next_page:
 								                break
 								            webpage = self._download_webpage(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                urllib.parse.urljoin(url, next_page), playlist_id,
 								                f'Downloading page {page_num}', page_num)
-												[bbc:playlist] Add support for pagination (Closes #10349)

											
										
										
											2016-08-15 21:36:23 +00:00
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
+								    def _real_extract(self, url):
 								        playlist_id = self._match_id(url)
 								        webpage = self._download_webpage(url, playlist_id)
 								        title, description = self._extract_title_and_description(webpage)
-												[bbc:playlist] Add support for pagination (Closes #10349)

											
										
										
											2016-08-15 21:36:23 +00:00
+								        return self.playlist_result(
 								            self._entries(webpage, url, playlist_id),
 								            playlist_id, title, description)
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    _VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P<id>{BBCCoUkIE._ID_REGEX})'
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
 								    @staticmethod
 								    def _get_default(episode, key, default_key='default'):
 								        return try_get(episode, lambda x: x[key][default_key])
 								    def _get_description(self, data):
 								        synopsis = data.get(self._DESCRIPTION_KEY) or {}
 								        return dict_get(synopsis, ('large', 'medium', 'small'))
 								    def _fetch_page(self, programme_id, per_page, series_id, page):
 								        elements = self._get_elements(self._call_api(
 								            programme_id, per_page, page + 1, series_id))
 								        for element in elements:
 								            episode = self._get_episode(element)
 								            episode_id = episode.get('id')
 								            if not episode_id:
 								                continue
 								            thumbnail = None
 								            image = self._get_episode_image(episode)
 								            if image:
 								                thumbnail = image.replace('{recipe}', 'raw')
 								            category = self._get_default(episode, 'labels', 'category')
 								            yield {
 								                '_type': 'url',
 								                'id': episode_id,
 								                'title': self._get_episode_field(episode, 'subtitle'),
 								                'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
 								                'thumbnail': thumbnail,
 								                'description': self._get_description(episode),
 								                'categories': [category] if category else None,
 								                'series': self._get_episode_field(episode, 'title'),
 								                'ie_key': BBCCoUkIE.ie_key(),
 								            }
 								    def _real_extract(self, url):
 								        pid = self._match_id(url)
-												[utils] Add `parse_qs`

											
										
										
											2021-08-22 19:02:00 +00:00
+								        qs = parse_qs(url)
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								        series_id = qs.get('seriesId', [None])[0]
 								        page = qs.get('page', [None])[0]
 								        per_page = 36 if page else self._PAGE_SIZE
 								        fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
 								        entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
 								        playlist_data = self._get_playlist_data(self._call_api(pid, 1))
 								        return self.playlist_result(
 								            entries, pid, self._get_playlist_title(playlist_data),
 								            self._get_description(playlist_data))
 								class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
 								    IE_NAME = 'bbc.co.uk:iplayer:episodes'
 								    _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
-												[bbc.co.uk:iplayer:playlist] Add support for group URLs

											
										
										
											2016-07-21 15:37:36 +00:00
+								    _TESTS = [{
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
+								        'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
 								        'info_dict': {
 								            'id': 'b05rcz9v',
 								            'title': 'The Disappearance',
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								            'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
+								        },
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								        'playlist_mincount': 8,
-												[bbc.co.uk:iplayer:playlist] Add support for group URLs

											
										
										
											2016-07-21 15:37:36 +00:00
+								    }, {
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								        # all seasons
 								        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
 								        'info_dict': {
 								            'id': 'b094m5t9',
 								            'title': 'Doctor Foster',
 								            'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
 								        },
 								        'playlist_mincount': 10,
 								    }, {
 								        # explicit season
 								        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
 								        'info_dict': {
 								            'id': 'b094m5t9',
 								            'title': 'Doctor Foster',
 								            'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
 								        },
 								        'playlist_mincount': 5,
 								    }, {
 								        # all pages
 								        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
 								        'info_dict': {
 								            'id': 'm0004c4v',
 								            'title': 'Beechgrove',
 								            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
 								        },
 								        'playlist_mincount': 37,
 								    }, {
 								        # explicit page
 								        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
 								        'info_dict': {
 								            'id': 'm0004c4v',
 								            'title': 'Beechgrove',
 								            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
 								        },
 								        'playlist_mincount': 1,
 								    }]
 								    _PAGE_SIZE = 100
 								    _DESCRIPTION_KEY = 'synopsis'
 								    def _get_episode_image(self, episode):
 								        return self._get_default(episode, 'image')
 								    def _get_episode_field(self, episode, field):
 								        return self._get_default(episode, field)
 								    @staticmethod
 								    def _get_elements(data):
 								        return data['entities']['results']
 								    @staticmethod
 								    def _get_episode(element):
 								        return element.get('episode') or {}
 								    def _call_api(self, pid, per_page, page=1, series_id=None):
 								        variables = {
 								            'id': pid,
 								            'page': page,
 								            'perPage': per_page,
 								        }
 								        if series_id:
 								            variables['sliceId'] = series_id
 								        return self._download_json(
 								            'https://graph.ibl.api.bbc.co.uk/', pid, headers={
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                'Content-Type': 'application/json',
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								            }, data=json.dumps({
 								                'id': '5692d93d5aac8d796a0305e895e61551',
 								                'variables': variables,
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            }).encode())['data']['programme']
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
 								    @staticmethod
 								    def _get_playlist_data(data):
 								        return data
 								    def _get_playlist_title(self, data):
 								        return self._get_default(data, 'title')
 								class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
 								    IE_NAME = 'bbc.co.uk:iplayer:group'
 								    _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
 								    _TESTS = [{
-												[bbc.co.uk:iplayer:playlist] Add support for group URLs

											
										
										
											2016-07-21 15:37:36 +00:00
+								        # Available for over a year unlike 30 days for most other programmes
 								        'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
 								        'info_dict': {
 								            'id': 'p02tcc32',
 								            'title': 'Bohemian Icons',
 								            'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
 								        },
 								        'playlist_mincount': 10,
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								    }, {
 								        # all pages
 								        'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
 								        'info_dict': {
 								            'id': 'p081d7j7',
 								            'title': 'Music in Scotland',
 								            'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
 								        },
 								        'playlist_mincount': 47,
 								    }, {
 								        # explicit page
 								        'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
 								        'info_dict': {
 								            'id': 'p081d7j7',
 								            'title': 'Music in Scotland',
 								            'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
 								        },
 								        'playlist_mincount': 11,
-												[bbc.co.uk:iplayer:playlist] Add support for group URLs

											
										
										
											2016-07-21 15:37:36 +00:00
+								    }]
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								    _PAGE_SIZE = 200
 								    _DESCRIPTION_KEY = 'synopses'
 								    def _get_episode_image(self, episode):
 								        return self._get_default(episode, 'images', 'standard')
 								    def _get_episode_field(self, episode, field):
 								        return episode.get(field)
 								    @staticmethod
 								    def _get_elements(data):
 								        return data['elements']
 								    @staticmethod
 								    def _get_episode(element):
 								        return element
 								    def _call_api(self, pid, per_page, page=1, series_id=None):
 								        return self._download_json(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes',
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								            pid, query={
 								                'page': page,
 								                'per_page': per_page,
 								            })['group_episodes']
 								    @staticmethod
 								    def _get_playlist_data(data):
 								        return data['group']
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
-												Update to ytdl-commit-8562218

[ard] improve clip id extraction
https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf

											
										
										
											2021-03-24 22:32:37 +00:00
+								    def _get_playlist_title(self, data):
 								        return data.get('title')
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
 								class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
 								    IE_NAME = 'bbc.co.uk:playlist'
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    _VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)'
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
+								    _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
 								    _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
 								    _TESTS = [{
 								        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
 								        'info_dict': {
 								            'id': 'b05rcz9v',
 								            'title': 'The Disappearance - Clips - BBC Four',
 								            'description': 'French thriller serial about a missing teenager.',
 								        },
 								        'playlist_mincount': 7,
-												[bbc:playlist] Fix tests

											
										
										
											2016-08-15 21:43:10 +00:00
+								    }, {
 								        # multipage playlist, explicit page
 								        'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
 								        'info_dict': {
 								            'id': 'b00mfl7n',
 								            'title': 'Frozen Planet - Clips - BBC One',
 								            'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
 								        },
 								        'playlist_mincount': 24,
 								    }, {
 								        # multipage playlist, all pages
 								        'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
 								        'info_dict': {
 								            'id': 'b00mfl7n',
 								            'title': 'Frozen Planet - Clips - BBC One',
 								            'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
 								        },
 								        'playlist_mincount': 142,
-												[bbccouk] Add support for playlists (Closes #9812)

											
										
										
											2016-06-17 16:42:52 +00:00
+								    }, {
 								        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
 								        'only_matching': True,
 								    }, {
 								        'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
 								        'only_matching': True,
 								    }, {
 								        'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
 								        'only_matching': True,
 								    }]
 								    def _extract_title_and_description(self, webpage):
 								        title = self._og_search_title(webpage, fatal=False)
 								        description = self._og_search_description(webpage)
 								        return title, description