0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-12-04 04:07:26 +00:00

[brightcove] add support for brightcove in page embed(fixes #6824)

This commit is contained in:
remitamine 2015-09-11 04:46:21 +01:00
parent 689fb748ee
commit ed1269000f
3 changed files with 116 additions and 2 deletions

View file

@ -59,7 +59,10 @@
from .bpb import BpbIE from .bpb import BpbIE
from .br import BRIE from .br import BRIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE from .brightcove import (
BrightcoveIE,
BrightcoveInPageEmbedIE,
)
from .buzzfeed import BuzzFeedIE from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE
from .c56 import C56IE from .c56 import C56IE

View file

@ -22,6 +22,10 @@
fix_xml_ampersands, fix_xml_ampersands,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
js_to_json,
int_or_none,
parse_iso8601,
extract_attributes,
) )
@ -346,3 +350,91 @@ def _extract_video_info(self, video_info):
if 'url' not in info and not info.get('formats'): if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id']) raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info return info
class BrightcoveInPageEmbedIE(InfoExtractor):
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/([a-z0-9-]+)_([a-z]+)/index.html?.*videoId=(?P<video_id>\d+)'
TEST = {
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'info_dict': {
'id': '4463358922001',
'ext': 'flv',
'title': 'Meet the man behind Popcorn Time',
'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
'duration': 165768,
}
}
@staticmethod
def _extract_url(webpage):
video_attributes = re.search(r'(?s)<video([^>]*)>.*?</(?:video|audio)>', webpage)
if video_attributes:
video_attributes = extract_attributes(video_attributes.group(), r'(?s)\s*data-(account|video-id|playlist-id|policy-key|player|embed)\s*=\s*["\']([^"\']+)["\']')
account_id = video_attributes.get('account')
player_id = video_attributes.get('player')
embed = video_attributes.get('embed')
video_id = video_attributes.get('video-id')
if account_id and player_id and embed and video_id:
return 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (account_id, player_id, embed, video_id)
return None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
account_id, player_id, embed, video_id = mobj.groups()
webpage = self._download_webpage('http://players.brightcove.net/%s/%s_%s/index.min.js' % (account_id, player_id, embed), video_id)
catalog = self._parse_json(
js_to_json(
self._search_regex(
r'catalog\(({[^}]+})\);',
webpage,
'catalog'
)
),
video_id
)
policy_key = catalog['policyKey']
req = compat_urllib_request.Request(
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key})
json_data = self._download_json(req, video_id)
title = json_data['name']
description = json_data.get('description')
thumbnail = json_data.get('name')
timestamp = parse_iso8601(json_data.get('published_at'))
duration = int_or_none(json_data.get('duration'))
formats = []
for source in json_data.get('sources'):
source_type = source.get('type')
if source_type == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats(source.get('src'), video_id))
else:
src = source.get('src')
if src:
formats.append({
'url': src,
'abr': source.get('avg_bitrate'),
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
'filesize': source.get('size'),
'container': source.get('container'),
'vcodec': source.get('container'),
})
else:
formats.extend(self._extract_f4m_formats(source.get('streaming_src'), video_id))
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}

View file

@ -29,7 +29,10 @@
url_basename, url_basename,
xpath_text, xpath_text,
) )
from .brightcove import BrightcoveIE from .brightcove import (
BrightcoveIE,
BrightcoveInPageEmbedIE,
)
from .nbc import NBCSportsVPlayerIE from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .rutv import RUTVIE from .rutv import RUTVIE
@ -1012,6 +1015,17 @@ class GenericIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'cinemasnob', 'title': 'cinemasnob',
}, },
},
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
'info_dict': {
'id': '4238694884001',
'ext': 'flv',
'title': 'Tabletop: Dread, Last Thoughts',
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
} }
] ]
@ -1288,6 +1302,11 @@ def _playlist_from_matches(matches, getter=None, ie=None):
'entries': entries, 'entries': entries,
} }
# Look for Brightcove In Page Embed:
brightcove_in_page_embed_url = BrightcoveInPageEmbedIE._extract_url(webpage)
if brightcove_in_page_embed_url:
return self.url_result(brightcove_in_page_embed_url, 'BrightcoveInPageEmbed')
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',