From f5a9e9df0da38a0c3c13f1dd106d5eb585253f0c Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 17 Nov 2022 19:11:35 +0000 Subject: [PATCH] [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction (#5558) * Move Brightcove embed extraction and tests into the IEs * Split `BrightcoveNewBaseIE` from `BrightcoveNewIE` * Fix bug in ade1fa70cbaaaadaa4772e5f0564870cea3167ef with the "wrong" spelling of `referrer` being smuggled Closes #5539 --- yt_dlp/extractor/bandaichannel.py | 4 +- yt_dlp/extractor/brightcove.py | 529 ++++++++++++++++++++++-------- yt_dlp/extractor/generic.py | 270 +-------------- yt_dlp/extractor/sevenplus.py | 4 +- 4 files changed, 403 insertions(+), 404 deletions(-) diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py index e438d16ea..d7fcf44bd 100644 --- a/yt_dlp/extractor/bandaichannel.py +++ b/yt_dlp/extractor/bandaichannel.py @@ -1,8 +1,8 @@ -from .brightcove import BrightcoveNewIE +from .brightcove import BrightcoveNewBaseIE from ..utils import extract_attributes -class BandaiChannelIE(BrightcoveNewIE): # XXX: Do not subclass from concrete IE +class BandaiChannelIE(BrightcoveNewBaseIE): IE_NAME = 'bandaichannel' _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P\d+/\d+)' _TESTS = [{ diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 35e1aa9c9..2b7ddcae8 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -145,6 +145,159 @@ class BrightcoveLegacyIE(InfoExtractor): } ] + _WEBPAGE_TESTS = [{ + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' + # in the http requests + 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', + 'info_dict': { + 'id': '2765128793001', + 'ext': 'mp4', + 'title': 'Le cours de bourse : l’analyse technique', + 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9', + 'uploader': 'BFM BUSINESS', + }, + 'params': { + 'skip_download': True, + }, + 'skip': '404 Not Found', + }, { + # embedded with itemprop embedURL and video id spelled as `idVideo` + 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', + 'info_dict': { + 'id': '5255628253001', + 'ext': 'mp4', + 'title': 'md5:37c519b1128915607601e75a87995fc0', + 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', + 'uploader': 'BFM BUSINESS', + 'uploader_id': '876450612001', + 'timestamp': 1482255315, + 'upload_date': '20161220', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Redirects, page gone', + }, { + # https://github.com/ytdl-org/youtube-dl/issues/2253 + 'url': 'http://bcove.me/i6nfkrc3', + 'md5': '0ba9446db037002366bab3b3eb30c88c', + 'info_dict': { + 'id': '3101154703001', + 'ext': 'mp4', + 'title': 'Still no power', + 'uploader': 'thestar.com', + 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', + }, + 'skip': 'video gone', + }, { + # https://github.com/ytdl-org/youtube-dl/issues/3541 + 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', + 'info_dict': { + 'id': '3866516442001', + 'ext': 'mp4', + 'title': 'Leer mij vrouwen kennen: Aflevering 1', + 'description': 'Leer mij vrouwen kennen: Aflevering 1', + 'uploader': 'SBS Broadcasting', + }, + 'skip': 'Restricted to Netherlands, 404 Not Found', + 'params': { + 'skip_download': True, # m3u8 download + }, + }, { + # Brightcove video in