0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-26 02:55:17 +00:00

Updated to release 2020.11.21.1

This commit is contained in:
pukkandan 2020-11-21 20:20:42 +05:30
parent 3462ffa892
commit a0566bbf5c
29 changed files with 559 additions and 360 deletions

View file

@ -37,7 +37,7 @@
"writeinfojson": true, "writeinfojson": true,
"writesubtitles": false, "writesubtitles": false,
"allsubtitles": false, "allsubtitles": false,
"listssubtitles": false, "listsubtitles": false,
"socket_timeout": 20, "socket_timeout": 20,
"fixup": "never" "fixup": "never"
} }

View file

@ -919,6 +919,76 @@ def _real_extract(self, url):
self.assertEqual(downloaded['extractor'], 'testex') self.assertEqual(downloaded['extractor'], 'testex')
self.assertEqual(downloaded['extractor_key'], 'TestEx') self.assertEqual(downloaded['extractor_key'], 'TestEx')
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
class _YDL(YDL):
def __init__(self, *args, **kwargs):
super(_YDL, self).__init__(*args, **kwargs)
def trouble(self, s, tb=None):
pass
ydl = _YDL({
'format': 'extra',
'ignoreerrors': True,
})
class VideoIE(InfoExtractor):
_VALID_URL = r'video:(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
formats = [{
'format_id': 'default',
'url': 'url:',
}]
if video_id == '0':
raise ExtractorError('foo')
if video_id == '2':
formats.append({
'format_id': 'extra',
'url': TEST_URL,
})
return {
'id': video_id,
'title': 'Video %s' % video_id,
'formats': formats,
}
class PlaylistIE(InfoExtractor):
_VALID_URL = r'playlist:'
def _entries(self):
for n in range(3):
video_id = compat_str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),
'id': video_id,
'url': 'video:%s' % video_id,
'title': 'Video Transparent %s' % video_id,
}
def _real_extract(self, url):
return self.playlist_result(self._entries())
ydl.add_info_extractor(VideoIE(ydl))
ydl.add_info_extractor(PlaylistIE(ydl))
info = ydl.extract_info('playlist:')
entries = info['entries']
self.assertEqual(len(entries), 3)
self.assertTrue(entries[0] is None)
self.assertTrue(entries[1] is None)
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(entries[2], downloaded)
self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'Video Transparent 2')
self.assertEqual(downloaded['id'], '2')
self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -830,34 +830,23 @@ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_in
'and will probably not work.') 'and will probably not work.')
try: try:
try: temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url) except (AssertionError, IndexError, AttributeError):
except (AssertionError, IndexError, AttributeError): temp_id = None
temp_id = None if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): self.to_screen("[%s] %s: has already been recorded in archive" % (
self.to_screen("[%s] %s: has already been recorded in archive" % ( ie_key, temp_id))
ie_key, temp_id)) break
break
ie_result = ie.extract(url) return self.__extract_info(url, ie, download, extra_info, process, info_dict)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break else:
if isinstance(ie_result, list): self.report_error('no suitable InfoExtractor for URL %s' % url)
# Backwards compatibility: old IE result format
ie_result = { def __handle_extraction_exceptions(func):
'_type': 'compat_list', def wrapper(self, *args, **kwargs):
'entries': ie_result, try:
} return func(self, *args, **kwargs)
if info_dict:
if info_dict.get('id'):
ie_result['id'] = info_dict['id']
if info_dict.get('title'):
ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
except GeoRestrictedError as e: except GeoRestrictedError as e:
msg = e.msg msg = e.msg
if e.countries: if e.countries:
@ -865,20 +854,38 @@ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_in
map(ISO3166Utils.short2full, e.countries)) map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg) self.report_error(msg)
break
except ExtractorError as e: # An error we somewhat expected except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback()) self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached: except MaxDownloadsReached:
raise raise
except Exception as e: except Exception as e:
if self.params.get('ignoreerrors', False): if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else: else:
raise raise
return wrapper
@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process, info_dict):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
return
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
if info_dict:
if info_dict.get('id'):
ie_result['id'] = info_dict['id']
if info_dict.get('title'):
ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else: else:
self.report_error('no suitable InfoExtractor for URL %s' % url) return ie_result
def add_default_extra_info(self, ie_result, ie, url): def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, { self.add_extra_info(ie_result, {
@ -1057,9 +1064,8 @@ def report_download(num_entries):
self.to_screen('[download] ' + reason) self.to_screen('[download] ' + reason)
continue continue
entry_result = self.process_ie_result(entry, entry_result = self.__process_iterable_entry(entry, download, extra)
download=download, # TODO: skip failed (empty) entries?
extra_info=extra)
playlist_results.append(entry_result) playlist_results.append(entry_result)
ie_result['entries'] = playlist_results ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist) self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@ -1088,6 +1094,11 @@ def _fixup(r):
else: else:
raise Exception('Invalid result type: %s' % result_type) raise Exception('Invalid result type: %s' % result_type)
@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)
def _build_format_filter(self, filter_spec): def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec " " Returns a function to filter the formats according to the filter_spec "

View file

@ -2345,7 +2345,7 @@ def __init__(self, version, name, value, *args, **kwargs):
# HTMLParseError has been deprecated in Python 3.3 and removed in # HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exceptiong handling # and uniform cross-version exception handling
class compat_HTMLParseError(Exception): class compat_HTMLParseError(Exception):
pass pass

View file

@ -109,7 +109,9 @@ def establish_connection():
try: try:
ctx.data = self.ydl.urlopen(request) ctx.data = self.ydl.urlopen(request)
except (compat_urllib_error.URLError, ) as err: except (compat_urllib_error.URLError, ) as err:
if isinstance(err.reason, socket.timeout): # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
reason = getattr(err, 'reason', None)
if isinstance(reason, socket.timeout):
raise RetryDownload(err) raise RetryDownload(err)
raise err raise err
# When trying to resume, Content-Range HTTP header of response has to be checked # When trying to resume, Content-Range HTTP header of response has to be checked

View file

@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
from ..utils import (
int_or_none,
parse_iso8601,
update_url_query,
)
class AmaraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
_TESTS = [{
# Youtube
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
'info_dict': {
'id': 'h6ZuVdvYnfE',
'ext': 'mp4',
'title': 'Why jury trials are becoming less common',
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20160813',
'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570,
}
}, {
# Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
'md5': '99392c75fa05d432a8f11df03612195e',
'info_dict': {
'id': '18622084',
'ext': 'mov',
'title': 'Vimeo at CES 2011!',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'timestamp': 1294763658,
'upload_date': '20110111',
'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill'
}
}, {
# Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
'md5': 'd3970f08512738ee60c5807311ff5d3f',
'info_dict': {
'id': 's8KL7I3jLmh6',
'ext': 'mp4',
'title': 'The danger of a single story',
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20091007',
'timestamp': 1254942511,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id,
video_id, query={'format': 'json'})
title = meta['title']
video_url = meta['all_urls'][0]
subtitles = {}
for language in (meta.get('languages') or []):
subtitles_uri = language.get('subtitles_uri')
if not (subtitles_uri and language.get('published')):
continue
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
for f in ('json', 'srt', 'vtt'):
subtitle.append({
'ext': f,
'url': update_url_query(subtitles_uri, {'format': f}),
})
info = {
'url': video_url,
'id': video_id,
'subtitles': subtitles,
'title': title,
'description': meta.get('description'),
'thumbnail': meta.get('thumbnail'),
'duration': int_or_none(meta.get('duration')),
'timestamp': parse_iso8601(meta.get('created')),
}
for ie in (YoutubeIE, VimeoIE):
if ie.suitable(video_url):
info.update({
'_type': 'url_transparent',
'ie_key': ie.ie_key(),
})
break
return info

View file

@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
] ]
@classmethod @classmethod
def _build_brighcove_url(cls, object_str): def _build_brightcove_url(cls, object_str):
""" """
Build a Brightcove url from a xml string containing Build a Brightcove url from a xml string containing
<object class="BrightcoveExperience">{params}</object> <object class="BrightcoveExperience">{params}</object>
@ -217,7 +217,7 @@ def find_param(name):
return cls._make_brightcove_url(params) return cls._make_brightcove_url(params)
@classmethod @classmethod
def _build_brighcove_url_from_js(cls, object_js): def _build_brightcove_url_from_js(cls, object_js):
# The layout of JS is as follows: # The layout of JS is as follows:
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
# // build Brightcove <object /> XML # // build Brightcove <object /> XML
@ -272,12 +272,12 @@ def _extract_brightcove_urls(cls, webpage):
).+?>\s*</object>''', ).+?>\s*</object>''',
webpage) webpage)
if matches: if matches:
return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
if matches: if matches:
return list(filter(None, [ return list(filter(None, [
cls._build_brighcove_url_from_js(custom_bc) cls._build_brightcove_url_from_js(custom_bc)
for custom_bc in matches])) for custom_bc in matches]))
return [src for _, src in re.findall( return [src for _, src in re.findall(
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]

View file

@ -1664,7 +1664,7 @@ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
# just the media without qualities renditions. # just the media without qualities renditions.
# Fortunately, master playlist can be easily distinguished from media # Fortunately, master playlist can be easily distinguished from media
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4] # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
# master playlist tags MUST NOT appear in a media playist and vice versa. # master playlist tags MUST NOT appear in a media playlist and vice versa.
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
# media playlist and MUST NOT appear in master playlist thus we can # media playlist and MUST NOT appear in master playlist thus we can
# clearly detect media playlist with this criterion. # clearly detect media playlist with this criterion.

View file

@ -7,7 +7,7 @@
class DiscoveryNetworksDeIE(DPlayIE): class DiscoveryNetworksDeIE(DPlayIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
}, { }, {
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -60,7 +60,7 @@ def get_item(type_, preference):
title = get_item('title', preferred_langs) or video_id title = get_item('title', preferred_langs) or video_id
description = get_item('description', preferred_langs) description = get_item('description', preferred_langs)
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
@ -85,7 +85,7 @@ def get_item(type_, preference):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnmail, 'thumbnail': thumbnail,
'upload_date': upload_date, 'upload_date': upload_date,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,

View file

@ -36,6 +36,7 @@
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
from .alura import ( from .alura import (
AluraIE, AluraIE,
AluraCourseIE AluraCourseIE
@ -1507,7 +1508,6 @@
YoutubeIE, YoutubeIE,
YoutubeFavouritesIE, YoutubeFavouritesIE,
YoutubeHistoryIE, YoutubeHistoryIE,
YoutubeLiveIE,
YoutubeTabIE, YoutubeTabIE,
YoutubePlaylistIE, YoutubePlaylistIE,
YoutubeRecommendedIE, YoutubeRecommendedIE,

View file

@ -211,7 +211,7 @@ def sign(manifest_url, manifest_id):
'id': video_id, 'id': video_id,
'title': self._live_title(title) if is_live else title, 'title': self._live_title(title) if is_live else title,
'description': clean_html(info.get('synopsis')), 'description': clean_html(info.get('synopsis')),
'thumbnail': urljoin('http://pluzz.francetv.fr', info.get('image')), 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
'is_live': is_live, 'is_live': is_live,

View file

@ -842,7 +842,7 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, },
# MTVSercices embed # MTVServices embed
{ {
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html', 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
'md5': 'ca1aef97695ef2c1d6973256a57e5252', 'md5': 'ca1aef97695ef2c1d6973256a57e5252',

View file

@ -3,11 +3,13 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
try_get,
update_url_query, update_url_query,
) )
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
# video can't be watched anonymously due to view count limit reached, # video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046) # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view', 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
'md5': 'bfbd670d03a470bb1e6d4a257adec12e', 'only_matching': True,
'info_dict': {
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
'ext': 'mp4',
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
}
}, { }, {
# video id is longer than 28 characters # video id is longer than 28 characters
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
'info_dict': {
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
'ext': 'mp4',
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
'duration': 189,
},
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28', 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
@ -171,23 +162,21 @@ def _get_automatic_captions(self, video_id, subtitles_id, hl):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( video_info = compat_parse_qs(self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id) 'https://drive.google.com/get_video_info',
video_id, query={'docid': video_id}))
title = self._search_regex( def get_value(key):
r'"title"\s*,\s*"([^"]+)', webpage, 'title', return try_get(video_info, lambda x: x[key][0])
default=None) or self._og_search_title(webpage)
duration = int_or_none(self._search_regex( reason = get_value('reason')
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', title = get_value('title')
default=None)) if not title and reason:
raise ExtractorError(reason, expected=True)
formats = [] formats = []
fmt_stream_map = self._search_regex( fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, fmt_list = (get_value('fmt_list') or '').split(',')
'fmt stream map', default='').split(',')
fmt_list = self._search_regex(
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
'fmt_list', default='').split(',')
if fmt_stream_map and fmt_list: if fmt_stream_map and fmt_list:
resolutions = {} resolutions = {}
for fmt in fmt_list: for fmt in fmt_list:
@ -257,19 +246,14 @@ def add_source_format(urlh):
if urlh and urlh.headers.get('Content-Disposition'): if urlh and urlh.headers.get('Content-Disposition'):
add_source_format(urlh) add_source_format(urlh)
if not formats: if not formats and reason:
reason = self._search_regex( raise ExtractorError(reason, expected=True)
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason, expected=True)
self._sort_formats(formats) self._sort_formats(formats)
hl = self._search_regex( hl = get_value('hl')
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
subtitles_id = None subtitles_id = None
ttsurl = self._search_regex( ttsurl = get_value('ttsurl')
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
if ttsurl: if ttsurl:
# the video Id for subtitles will be the last value in the ttsurl # the video Id for subtitles will be the last value in the ttsurl
# query string # query string
@ -281,8 +265,8 @@ def add_source_format(urlh):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': self._og_search_thumbnail(webpage, default=None), 'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
'duration': duration, 'duration': int_or_none(get_value('length_seconds')),
'formats': formats, 'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
'automatic_captions': self.extract_automatic_captions( 'automatic_captions': self.extract_automatic_captions(

View file

@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
def _extract_rtmp_video(self, webpage): def _extract_rtmp_video(self, webpage):
# The server URL is hardcoded # The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/' video_url = 'rtmpe://videof.infoq.com/cfx/st/'
# Extract video URL # Extract video URL
encoded_id = self._search_regex( encoded_id = self._search_regex(
@ -86,17 +86,18 @@ def _extract_http_video(self, webpage):
return [{ return [{
'format_id': 'http_video', 'format_id': 'http_video',
'url': http_video_url, 'url': http_video_url,
'http_headers': {'Referer': 'https://www.infoq.com/'},
}] }]
def _extract_http_audio(self, webpage, video_id): def _extract_http_audio(self, webpage, video_id):
fields = self._hidden_inputs(webpage) fields = self._form_hidden_inputs('mp3Form', webpage)
http_audio_url = fields.get('filename') http_audio_url = fields.get('filename')
if not http_audio_url: if not http_audio_url:
return [] return []
# base URL is found in the Location header in the response returned by # base URL is found in the Location header in the response returned by
# GET https://www.infoq.com/mp3download.action?filename=... when logged in. # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url) http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage)) http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
# audio file seem to be missing some times even if there is a download link # audio file seem to be missing some times even if there is a download link

View file

@ -64,7 +64,7 @@ def _real_extract(self, url):
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000) duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
description = xpath_text(doc, 'ABSTRACT') description = xpath_text(doc, 'ABSTRACT')
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME') thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate')) creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content') quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
formats = [] formats = []
@ -84,5 +84,5 @@ def _real_extract(self, url):
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'timestamp': createtion_time, 'timestamp': creation_time,
} }

View file

@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
}, },
}], }],
}, { }, {
# mutlimedia, not media title # multimedia, not media title
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
'info_dict': { 'info_dict': {
'id': '533198237', 'id': '533198237',

View file

@ -477,7 +477,7 @@ def _extract_webpage(self, url):
if media_id: if media_id:
return media_id, presumptive_id, upload_date, description return media_id, presumptive_id, upload_date, description
# Fronline video embedded via flp # Frontline video embedded via flp
video_id = self._search_regex( video_id = self._search_regex(
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None) r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
if video_id: if video_id:

View file

@ -16,8 +16,9 @@
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
parse_duration, parse_duration,
remove_start,
strip_or_none, strip_or_none,
unescapeHTML, try_get,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -67,7 +68,7 @@ def _extract_relinker_info(self, relinker_url, video_id):
# This does not imply geo restriction (e.g. # This does not imply geo restriction (e.g.
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
if media_url == 'http://download.rai.it/video_no_available.mp4': if '/video_no_available.mp4' in media_url:
continue continue
ext = determine_ext(media_url) ext = determine_ext(media_url)
@ -122,27 +123,8 @@ def _extract_subtitles(url, subtitle_url):
class RaiPlayIE(RaiBaseIE): class RaiPlayIE(RaiBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.(?:html|json))' % RaiBaseIE._UUID_RE _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
_TESTS = [{ _TESTS = [{
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
'md5': '340aa3b7afb54bfd14a8c11786450d76',
'info_dict': {
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
'ext': 'mp4',
'title': 'La Casa Bianca',
'alt_title': 'S2016 - Puntata del 23/10/2016',
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 3',
'creator': 'Rai 3',
'duration': 3278,
'timestamp': 1477764300,
'upload_date': '20161029',
'series': 'La Casa Bianca',
'season': '2016',
},
'skip': 'This content is not available',
}, {
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': { 'info_dict': {
@ -166,10 +148,10 @@ class RaiPlayIE(RaiBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
url, video_id = re.match(self._VALID_URL, url).groups() base, video_id = re.match(self._VALID_URL, url).groups()
media = self._download_json( media = self._download_json(
url.replace('.html', '.json'), video_id, 'Downloading video JSON') base + '.json', video_id, 'Downloading video JSON')
title = media['name'] title = media['name']
video = media['video'] video = media['video']
@ -195,7 +177,8 @@ def _real_extract(self, url):
season = media.get('season') season = media.get('season')
info = { info = {
'id': video_id, 'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
'display_id': video_id,
'title': self._live_title(title) if relinker_info.get( 'title': self._live_title(title) if relinker_info.get(
'is_live') else title, 'is_live') else title,
'alt_title': strip_or_none(media.get('subtitle')), 'alt_title': strip_or_none(media.get('subtitle')),
@ -217,16 +200,16 @@ def _real_extract(self, url):
return info return info
class RaiPlayLiveIE(RaiBaseIE): class RaiPlayLiveIE(RaiPlayIE):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
_TEST = { _TESTS = [{
'url': 'http://www.raiplay.it/dirette/rainews24', 'url': 'http://www.raiplay.it/dirette/rainews24',
'info_dict': { 'info_dict': {
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
'display_id': 'rainews24', 'display_id': 'rainews24',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:6eca31500550f9376819f174e5644754', 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
'uploader': 'Rai News 24', 'uploader': 'Rai News 24',
'creator': 'Rai News 24', 'creator': 'Rai News 24',
'is_live': True, 'is_live': True,
@ -234,58 +217,50 @@ class RaiPlayLiveIE(RaiBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} }]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
webpage, 'content id')
return {
'_type': 'url_transparent',
'ie_key': RaiPlayIE.ie_key(),
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
'id': video_id,
'display_id': display_id,
}
class RaiPlayPlaylistIE(InfoExtractor): class RaiPlayPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
'info_dict': { 'info_dict': {
'id': 'nondirloalmiocapo', 'id': 'nondirloalmiocapo',
'title': 'Non dirlo al mio capo', 'title': 'Non dirlo al mio capo',
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
}, },
'playlist_mincount': 12, 'playlist_mincount': 12,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) base, playlist_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, playlist_id) program = self._download_json(
base + '.json', playlist_id, 'Downloading program JSON')
title = self._html_search_meta(
('programma', 'nomeProgramma'), webpage, 'title')
description = unescapeHTML(self._html_search_meta(
('description', 'og:description'), webpage, 'description'))
entries = [] entries = []
for mobj in re.finditer( for b in (program.get('blocks') or []):
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', for s in (b.get('sets') or []):
webpage): s_id = s.get('id')
video_url = urljoin(url, mobj.group('path')) if not s_id:
entries.append(self.url_result( continue
video_url, ie=RaiPlayIE.ie_key(), medias = self._download_json(
video_id=RaiPlayIE._match_id(video_url))) '%s/%s.json' % (base, s_id), s_id,
'Downloading content set JSON', fatal=False)
if not medias:
continue
for m in (medias.get('items') or []):
path_id = m.get('path_id')
if not path_id:
continue
video_url = urljoin(url, path_id)
entries.append(self.url_result(
video_url, ie=RaiPlayIE.ie_key(),
video_id=RaiPlayIE._match_id(video_url)))
return self.playlist_result(entries, playlist_id, title, description) return self.playlist_result(
entries, playlist_id, program.get('name'),
try_get(program, lambda x: x['program_info']['description']))
class RaiIE(RaiBaseIE): class RaiIE(RaiBaseIE):
@ -328,19 +303,6 @@ class RaiIE(RaiBaseIE):
'duration': 2214, 'duration': 2214,
'upload_date': '20161103', 'upload_date': '20161103',
} }
}, {
# drawMediaRaiTV(...)
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20141221',
},
'skip': 'This content is not available',
}, { }, {
# initEdizione('ContentItem-...' # initEdizione('ContentItem-...'
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
@ -352,18 +314,6 @@ class RaiIE(RaiBaseIE):
'upload_date': '20170401', 'upload_date': '20170401',
}, },
'skip': 'Changes daily', 'skip': 'Changes daily',
}, {
# HDS live stream with only relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'params': {
'skip_download': True,
},
'skip': 'This content is available only in Italy',
}, { }, {
# HLS live stream with ContentItem in og:url # HLS live stream with ContentItem in og:url
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
@ -473,7 +423,7 @@ def _real_extract(self, url):
except ExtractorError: except ExtractorError:
pass pass
relinker_url = self._search_regex( relinker_url = self._proto_relative_url(self._search_regex(
r'''(?x) r'''(?x)
(?: (?:
var\s+videoURL| var\s+videoURL|
@ -485,7 +435,7 @@ def _real_extract(self, url):
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
''', ''',
webpage, 'relinker URL', group='url') webpage, 'relinker URL', group='url'))
relinker_info = self._extract_relinker_info( relinker_info = self._extract_relinker_info(
urljoin(url, relinker_url), video_id) urljoin(url, relinker_url), video_id)

View file

@ -649,7 +649,7 @@ def _real_extract(self, url):
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200. # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
# https://developers.soundcloud.com/blog/offset-pagination-deprecated # https://developers.soundcloud.com/blog/offset-pagination-deprecated
COMMON_QUERY = { COMMON_QUERY = {
'limit': 200, 'limit': 200,

View file

@ -9,6 +9,7 @@
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
unified_timestamp,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
try_get, try_get,
@ -44,7 +45,8 @@ def _extract_video(self, video_info, video_id):
'format_id': player_type, 'format_id': player_type,
'url': vurl, 'url': vurl,
}) })
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): rights = try_get(video_info, lambda x: x['rights'], dict) or {}
if not formats and rights.get('geoBlockedSweden'):
self.raise_geo_restricted( self.raise_geo_restricted(
'This video is only available in Sweden', 'This video is only available in Sweden',
countries=self._GEO_COUNTRIES) countries=self._GEO_COUNTRIES)
@ -70,6 +72,7 @@ def _extract_video(self, video_info, video_id):
episode = video_info.get('episodeTitle') episode = video_info.get('episodeTitle')
episode_number = int_or_none(video_info.get('episodeNumber')) episode_number = int_or_none(video_info.get('episodeNumber'))
timestamp = unified_timestamp(rights.get('validFrom'))
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
age_limit = None age_limit = None
adult = dict_get( adult = dict_get(
@ -84,6 +87,7 @@ def _extract_video(self, video_info, video_id):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'duration': duration, 'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit, 'age_limit': age_limit,
'series': series, 'series': series,
'season_number': season_number, 'season_number': season_number,
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv' IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
svt:(?P<svt_id>[^/?#&]+)| (?:
svt:|
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
)
(?P<svt_id>[^/?#&]+)|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
) )
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 'md5': '2382036fd6f8c994856c323fe51c426e',
'info_dict': { 'info_dict': {
'id': '5996901', 'id': 'jNwpV9P',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Flygplan till Haile Selassie', 'title': 'Det här är himlen',
'duration': 3527, 'timestamp': 1586044800,
'thumbnail': r're:^https?://.*[\.-]jpg$', 'upload_date': '20200405',
'duration': 3515,
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
'age_limit': 0, 'age_limit': 0,
'subtitles': { 'subtitles': {
'sv': [{ 'sv': [{
'ext': 'wsrt', 'ext': 'vtt',
}] }]
}, },
}, },
'params': {
'format': 'bestvideo',
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses
# init segments that are smaller
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
'skip_download': True,
},
}, { }, {
# geo restricted to Sweden # geo restricted to Sweden
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
}, { }, {
'url': 'svt:14278044', 'url': 'svt:14278044',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
'only_matching': True,
}, {
'url': 'svt:eWv5MLX',
'only_matching': True,
}] }]
def _adjust_title(self, info): def _adjust_title(self, info):
@ -236,7 +259,10 @@ def _real_extract(self, url):
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'), r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
webpage, 'video id') webpage, 'video id')
return self._extract_by_video_id(svt_id, webpage) info_dict = self._extract_by_video_id(svt_id, webpage)
info_dict['thumbnail'] = thumbnail
return info_dict
class SVTSeriesIE(SVTPlayBaseIE): class SVTSeriesIE(SVTPlayBaseIE):
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url) return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = re.match(self._VALID_URL, url).groups() path, display_id = re.match(self._VALID_URL, url).groups()

View file

@ -86,7 +86,7 @@ def _real_extract(self, url):
# return self._extract_via_api(kind, video_id) # return self._extract_via_api(kind, video_id)
# JSON api does not provide some audio formats (e.g. ogg) thus # JSON api does not provide some audio formats (e.g. ogg) thus
# extractiong audio via webpage # extracting audio via webpage
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)

View file

@ -208,7 +208,7 @@ def _extract_urls(cls, webpage):
if m: if m:
return [m.group('url')] return [m.group('url')]
# Are whitesapces ignored in URLs? # Are whitespaces ignored in URLs?
# https://github.com/ytdl-org/youtube-dl/issues/12044 # https://github.com/ytdl-org/youtube-dl/issues/12044
matches = re.findall( matches = re.findall(
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)

View file

@ -56,9 +56,9 @@ def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
content_id = xpath_text(video_data, 'contentId') or video_id content_id = xpath_text(video_data, 'contentId') or video_id
# rtmp_src = xpath_text(video_data, 'akamai/src') # rtmp_src = xpath_text(video_data, 'akamai/src')
# if rtmp_src: # if rtmp_src:
# splited_rtmp_src = rtmp_src.split(',') # split_rtmp_src = rtmp_src.split(',')
# if len(splited_rtmp_src) == 2: # if len(split_rtmp_src) == 2:
# rtmp_src = splited_rtmp_src[1] # rtmp_src = split_rtmp_src[1]
# aifp = xpath_text(video_data, 'akamai/aifp', default='') # aifp = xpath_text(video_data, 'akamai/aifp', default='')
urls = [] urls = []

View file

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import hashlib import hashlib
import hmac import hmac
import itertools import itertools
@ -9,6 +10,10 @@
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -166,19 +171,20 @@ class VikiIE(VikiBaseIE):
}, { }, {
# episode # episode
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
'md5': '5fa476a902e902783ac7a4d615cdbc7a', 'md5': '94e0e34fd58f169f40c184f232356cfe',
'info_dict': { 'info_dict': {
'id': '44699v', 'id': '44699v',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Boys Over Flowers - Episode 1', 'title': 'Boys Over Flowers - Episode 1',
'description': 'md5:b89cf50038b480b88b5b3c93589a9076', 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
'duration': 4204, 'duration': 4172,
'timestamp': 1270496524, 'timestamp': 1270496524,
'upload_date': '20100405', 'upload_date': '20100405',
'uploader': 'group8', 'uploader': 'group8',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, { }, {
# youtube external # youtube external
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1', 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@ -195,14 +201,15 @@ class VikiIE(VikiBaseIE):
'uploader_id': 'ad14065n', 'uploader_id': 'ad14065n',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'skip': 'Page not found!',
}, { }, {
'url': 'http://www.viki.com/player/44699v', 'url': 'http://www.viki.com/player/44699v',
'only_matching': True, 'only_matching': True,
}, { }, {
# non-English description # non-English description
'url': 'http://www.viki.com/videos/158036v-love-in-magic', 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
'md5': '1713ae35df5a521b31f6dc40730e7c9c', 'md5': 'adf9e321a0ae5d0aace349efaaff7691',
'info_dict': { 'info_dict': {
'id': '158036v', 'id': '158036v',
'ext': 'mp4', 'ext': 'mp4',
@ -218,71 +225,11 @@ class VikiIE(VikiBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video = self._call_api( resp = self._download_json(
'videos/%s.json' % video_id, video_id, 'Downloading video JSON') 'https://www.viki.com/api/videos/' + video_id,
video_id, 'Downloading video JSON',
streams = self._call_api( headers={'x-viki-app-ver': '4.0.57'})
'videos/%s/streams.json' % video_id, video_id, video = resp['video']
'Downloading video streams JSON')
formats = []
for format_id, stream_dict in streams.items():
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None))
for protocol, format_dict in stream_dict.items():
# rtmps URLs does not seem to work
if protocol == 'rtmps':
continue
format_url = format_dict.get('url')
format_drms = format_dict.get('drms')
format_stream_id = format_dict.get('id')
if format_id == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native',
m3u8_id='m3u8-%s' % protocol, fatal=False)
# Despite CODECS metadata in m3u8 all video-only formats
# are actually video+audio
for f in m3u8_formats:
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
f['acodec'] = None
formats.extend(m3u8_formats)
elif format_id == 'mpd':
mpd_formats = self._extract_mpd_formats(
format_url, video_id,
mpd_id='mpd-%s' % protocol, fatal=False)
formats.extend(mpd_formats)
elif format_id == 'mpd':
formats.extend(mpd_formats)
elif format_url.startswith('rtmp'):
mobj = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
format_url)
if not mobj:
continue
formats.append({
'format_id': 'rtmp-%s' % format_id,
'ext': 'flv',
'url': mobj.group('url'),
'play_path': mobj.group('playpath'),
'app': mobj.group('app'),
'page_url': url,
'drms': format_drms,
'stream_id': format_stream_id,
})
else:
urlh = self._request_webpage(
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
formats.append({
'url': format_url,
'format_id': '%s-%s' % (format_id, protocol),
'height': height,
'drms': format_drms,
'stream_id': format_stream_id,
'filesize': int_or_none(urlh.headers.get('Content-Length')),
})
self._sort_formats(formats)
self._check_errors(video) self._check_errors(video)
@ -342,12 +289,84 @@ def _real_extract(self, url):
'subtitles': subtitles, 'subtitles': subtitles,
} }
if 'external' in streams: formats = []
result.update({
'_type': 'url_transparent', def add_format(format_id, format_dict, protocol='http'):
'url': streams['external']['url'], # rtmps URLs does not seem to work
}) if protocol == 'rtmps':
return result return
format_url = format_dict.get('url')
if not format_url:
return
format_drms = format_dict.get('drms')
format_stream_id = format_dict.get('id')
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
stream = qs.get('stream', [None])[0]
if stream:
format_url = base64.b64decode(stream).decode()
if format_id in ('m3u8', 'hls'):
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4',
entry_protocol='m3u8_native',
m3u8_id='m3u8-%s' % protocol, fatal=False)
# Despite CODECS metadata in m3u8 all video-only formats
# are actually video+audio
for f in m3u8_formats:
if '_drm/index_' in f['url']:
continue
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
f['acodec'] = None
formats.append(f)
elif format_id in ('mpd', 'dash'):
formats.extend(self._extract_mpd_formats(
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
elif format_url.startswith('rtmp'):
mobj = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
format_url)
if not mobj:
return
formats.append({
'format_id': 'rtmp-%s' % format_id,
'ext': 'flv',
'url': mobj.group('url'),
'play_path': mobj.group('playpath'),
'app': mobj.group('app'),
'page_url': url,
'drms': format_drms,
'stream_id': format_stream_id,
})
else:
urlh = self._request_webpage(
HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
formats.append({
'url': format_url,
'format_id': '%s-%s' % (format_id, protocol),
'height': int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)),
'drms': format_drms,
'stream_id': format_stream_id,
'filesize': int_or_none(urlh.headers.get('Content-Length')),
})
for format_id, format_dict in (resp.get('streams') or {}).items():
add_format(format_id, format_dict)
if not formats:
streams = self._call_api(
'videos/%s/streams.json' % video_id, video_id,
'Downloading video streams JSON')
if 'external' in streams:
result.update({
'_type': 'url_transparent',
'url': streams['external']['url'],
})
return result
for format_id, stream_dict in streams.items():
for protocol, format_dict in stream_dict.items():
add_format(format_id, format_dict, protocol)
self._sort_formats(formats)
result['formats'] = formats result['formats'] = formats
return result return result

View file

@ -922,7 +922,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
}] }]
_PAGE_SIZE = 100 _PAGE_SIZE = 100
def _fetch_page(self, album_id, authorizaion, hashed_pass, page): def _fetch_page(self, album_id, authorization, hashed_pass, page):
api_page = page + 1 api_page = page + 1
query = { query = {
'fields': 'link,uri', 'fields': 'link,uri',
@ -934,7 +934,7 @@ def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
videos = self._download_json( videos = self._download_json(
'https://api.vimeo.com/albums/%s/videos' % album_id, 'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={ album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorizaion, 'Authorization': 'jwt ' + authorization,
})['data'] })['data']
for video in videos: for video in videos:
link = video.get('link') link = video.get('link')

View file

@ -54,17 +54,17 @@ def _extract_tracks(self, item_id, referer, typ=None):
def _decrypt(origin): def _decrypt(origin):
n = int(origin[0]) n = int(origin[0])
origin = origin[1:] origin = origin[1:]
short_lenth = len(origin) // n short_length = len(origin) // n
long_num = len(origin) - short_lenth * n long_num = len(origin) - short_length * n
l = tuple() l = tuple()
for i in range(0, n): for i in range(0, n):
length = short_lenth length = short_length
if i < long_num: if i < long_num:
length += 1 length += 1
l += (origin[0:length], ) l += (origin[0:length], )
origin = origin[length:] origin = origin[length:]
ans = '' ans = ''
for i in range(0, short_lenth + 1): for i in range(0, short_length + 1):
for j in range(0, n): for j in range(0, n):
if len(l[j]) > i: if len(l[j]) > i:
ans += l[j][i] ans += l[j][i]

View file

@ -306,6 +306,8 @@ def _real_initialize(self):
}, },
} }
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
def _call_api(self, ep, query, video_id): def _call_api(self, ep, query, video_id):
data = self._DEFAULT_API_DATA.copy() data = self._DEFAULT_API_DATA.copy()
data.update(query) data.update(query)
@ -322,8 +324,8 @@ def _call_api(self, ep, query, video_id):
def _extract_yt_initial_data(self, video_id, webpage): def _extract_yt_initial_data(self, video_id, webpage):
return self._parse_json( return self._parse_json(
self._search_regex( self._search_regex(
r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;', (r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
webpage, 'yt initial data'), self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
video_id) video_id)
@ -1089,6 +1091,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
'info_dict': {
'id': 'CHqg6qOn4no',
'ext': 'mp4',
'title': 'Part 77 Sort a list of simple types in c#',
'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
'upload_date': '20130831',
'uploader_id': 'kudvenkat',
'uploader': 'kudvenkat',
},
'params': {
'skip_download': True,
},
},
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -2138,6 +2156,21 @@ def _extract_filesize(media_url):
formats.append(a_format) formats.append(a_format)
else: else:
error_message = extract_unavailable_message() error_message = extract_unavailable_message()
if not error_message:
reason_list = try_get(
player_response,
lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
list) or []
for reason in reason_list:
if not isinstance(reason, dict):
continue
reason_text = try_get(reason, lambda x: x['text'], compat_str)
if reason_text:
if not error_message:
error_message = ''
error_message += reason_text
if error_message:
error_message = clean_html(error_message)
if not error_message: if not error_message:
error_message = clean_html(try_get( error_message = clean_html(try_get(
player_response, lambda x: x['playabilityStatus']['reason'], player_response, lambda x: x['playabilityStatus']['reason'],
@ -2319,8 +2352,8 @@ def extract_meta(field):
def _extract_count(count_name): def _extract_count(count_name):
return str_to_int(self._search_regex( return str_to_int(self._search_regex(
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
% re.escape(count_name), r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
video_webpage, count_name, default=None)) video_webpage, count_name, default=None))
like_count = _extract_count('like') like_count = _extract_count('like')
@ -2613,13 +2646,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, },
'playlist_mincount': 138, 'playlist_mincount': 138,
}, { }, {
'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA', 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA', 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ', 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
@ -2666,7 +2699,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, },
'playlist_mincount': 11, 'playlist_mincount': 11,
}, { }, {
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU', 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'only_matching': True, 'only_matching': True,
}, { }, {
# Playlist URL that does not actually serve a playlist # Playlist URL that does not actually serve a playlist
@ -2698,14 +2731,59 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
'only_matching': True, 'only_matching': True,
}] }, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
@classmethod 'info_dict': {
def suitable(cls, url): 'id': '9Auq9mYxFEE',
IGNORE = (YoutubeLiveIE,) 'ext': 'mp4',
return ( 'title': 'Watch Sky News live',
False if any(ie.suitable(url) for ie in IGNORE) 'uploader': 'Sky News',
else super(YoutubeTabIE, cls).suitable(url)) 'uploader_id': 'skynews',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
'upload_date': '20191102',
'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
'categories': ['News & Politics'],
'tags': list,
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.youtube.com/user/TheYoungTurks/live',
'info_dict': {
'id': 'a48o2S1cPoo',
'ext': 'mp4',
'title': 'The Young Turks - Live Main Show',
'uploader': 'The Young Turks',
'uploader_id': 'TheYoungTurks',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
'upload_date': '20150715',
'license': 'Standard YouTube License',
'description': 'md5:438179573adcdff3c97ebb1ee632b891',
'categories': ['News & Politics'],
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
},
'only_matching': True,
}, {
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
'only_matching': True,
}, {
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
'only_matching': True,
},
# TODO
# {
# 'url': 'https://www.youtube.com/TheYoungTurks/live',
# 'only_matching': True,
# }
]
def _extract_channel_id(self, webpage): def _extract_channel_id(self, webpage):
channel_id = self._html_search_meta( channel_id = self._html_search_meta(
@ -3147,7 +3225,7 @@ def _real_extract(self, url):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, item_id) webpage = self._download_webpage(url, item_id)
identity_token = self._search_regex( identity_token = self._search_regex(
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, r'\bID_TOKEN["\']\s*:\s/l*["\'](.+?)["\']', webpage,
'identity token', default=None) 'identity token', default=None)
data = self._extract_yt_initial_data(item_id, webpage) data = self._extract_yt_initial_data(item_id, webpage)
tabs = try_get( tabs = try_get(
@ -3158,7 +3236,11 @@ def _real_extract(self, url):
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
if playlist: if playlist:
return self._extract_from_playlist(item_id, data, playlist) return self._extract_from_playlist(item_id, data, playlist)
# Fallback to video extraction if no playlist alike page is recognized # Fallback to video extraction if no playlist alike page is recognized.
# First check for the current video then try the v attribute of URL query.
video_id = try_get(
data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
compat_str) or video_id
if video_id: if video_id:
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
# Failed to recognize # Failed to recognize
@ -3279,58 +3361,6 @@ def _real_extract(self, url):
ie=YoutubeTabIE.ie_key(), video_id=user_id) ie=YoutubeTabIE.ie_key(), video_id=user_id)
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com live streams'
_VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL
IE_NAME = 'youtube:live'
_TESTS = [{
'url': 'https://www.youtube.com/user/TheYoungTurks/live',
'info_dict': {
'id': 'a48o2S1cPoo',
'ext': 'mp4',
'title': 'The Young Turks - Live Main Show',
'uploader': 'The Young Turks',
'uploader_id': 'TheYoungTurks',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
'upload_date': '20150715',
'license': 'Standard YouTube License',
'description': 'md5:438179573adcdff3c97ebb1ee632b891',
'categories': ['News & Politics'],
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
'only_matching': True,
}, {
'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
'only_matching': True,
}, {
'url': 'https://www.youtube.com/TheYoungTurks/live',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
base_url = mobj.group('base_url')
webpage = self._download_webpage(url, channel_id, fatal=False)
if webpage:
page_type = self._og_search_property(
'type', webpage, 'page type', default='')
video_id = self._html_search_meta(
'videoId', webpage, 'video id', default=None)
if page_type.startswith('video') and video_id and re.match(
r'^[0-9A-Za-z_-]{11}$', video_id):
return self.url_result(video_id, YoutubeIE.ie_key())
return self.url_result(base_url)
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com searches' IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for # there doesn't appear to be a real limit, for example if you search for

View file

@ -2460,7 +2460,7 @@ def __init__(self, code=None, msg='Unknown error'):
# Parsing code and msg # Parsing code and msg
if (self.code in (errno.ENOSPC, errno.EDQUOT) if (self.code in (errno.ENOSPC, errno.EDQUOT)
or 'No space left' in self.msg or 'Disk quota excedded' in self.msg): or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
self.reason = 'NO_SPACE' self.reason = 'NO_SPACE'
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
self.reason = 'VALUE_TOO_LONG' self.reason = 'VALUE_TOO_LONG'
@ -4215,10 +4215,10 @@ def parse_codecs(codecs_str):
# http://tools.ietf.org/html/rfc6381 # http://tools.ietf.org/html/rfc6381
if not codecs_str: if not codecs_str:
return {} return {}
splited_codecs = list(filter(None, map( split_codecs = list(filter(None, map(
lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
vcodec, acodec = None, None vcodec, acodec = None, None
for full_codec in splited_codecs: for full_codec in split_codecs:
codec = full_codec.split('.')[0] codec = full_codec.split('.')[0]
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
if not vcodec: if not vcodec:
@ -4229,10 +4229,10 @@ def parse_codecs(codecs_str):
else: else:
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
if not vcodec and not acodec: if not vcodec and not acodec:
if len(splited_codecs) == 2: if len(split_codecs) == 2:
return { return {
'vcodec': splited_codecs[0], 'vcodec': split_codecs[0],
'acodec': splited_codecs[1], 'acodec': split_codecs[1],
} }
else: else:
return { return {
@ -5471,7 +5471,7 @@ def encode_base_n(num, n, table=None):
def decode_packed_codes(code): def decode_packed_codes(code):
mobj = re.search(PACKED_CODES_RE, code) mobj = re.search(PACKED_CODES_RE, code)
obfucasted_code, base, count, symbols = mobj.groups() obfuscated_code, base, count, symbols = mobj.groups()
base = int(base) base = int(base)
count = int(count) count = int(count)
symbols = symbols.split('|') symbols = symbols.split('|')
@ -5484,7 +5484,7 @@ def decode_packed_codes(code):
return re.sub( return re.sub(
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
obfucasted_code) obfuscated_code)
def caesar(s, alphabet, shift): def caesar(s, alphabet, shift):