From 19dbaeece321c51fa336ef142507adf440e22e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Mar 2016 07:03:08 +0600 Subject: [PATCH] Remove _sort_formats from _extract_*_formats methods Now _sort_formats should be called explicitly. _sort_formats has been added to all the necessary places in code. Closes #8051 --- youtube_dl/extractor/abc7news.py | 1 + youtube_dl/extractor/azubu.py | 1 + youtube_dl/extractor/bet.py | 1 + youtube_dl/extractor/cbsnews.py | 1 + youtube_dl/extractor/chaturbate.py | 1 + youtube_dl/extractor/common.py | 6 ------ youtube_dl/extractor/cwtv.py | 1 + youtube_dl/extractor/dfb.py | 1 + youtube_dl/extractor/discovery.py | 29 +++++++++++++++++------------ youtube_dl/extractor/dplay.py | 2 ++ youtube_dl/extractor/dw.py | 2 +- youtube_dl/extractor/generic.py | 11 ++++++++++- youtube_dl/extractor/laola1tv.py | 1 + youtube_dl/extractor/lrt.py | 1 + youtube_dl/extractor/matchtv.py | 1 + youtube_dl/extractor/mitele.py | 1 + youtube_dl/extractor/nrk.py | 1 + youtube_dl/extractor/restudy.py | 1 + youtube_dl/extractor/rte.py | 1 + youtube_dl/extractor/rtve.py | 1 + youtube_dl/extractor/rtvnh.py | 1 + youtube_dl/extractor/shahid.py | 1 + youtube_dl/extractor/sportbox.py | 1 + youtube_dl/extractor/telecinco.py | 1 + youtube_dl/extractor/tubitv.py | 1 + youtube_dl/extractor/videomore.py | 1 + youtube_dl/extractor/vier.py | 1 + youtube_dl/extractor/viidea.py | 1 + youtube_dl/extractor/ynet.py | 4 +++- 29 files changed, 56 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abc7news.py index 122dc9099..c04949c21 100644 --- a/youtube_dl/extractor/abc7news.py +++ b/youtube_dl/extractor/abc7news.py @@ -44,6 +44,7 @@ def _real_extract(self, url): 'contentURL', webpage, 'm3u8 url', fatal=True) formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') + self._sort_formats(formats) title = self._og_search_title(webpage).strip() description = self._og_search_description(webpage).strip() diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 1805b7312..efa624de1 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -120,6 +120,7 @@ def _real_extract(self, url): bc_info = self._download_json(req, user) m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS') formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4') + self._sort_formats(formats) return { 'id': info['id'], diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 03dad4636..986245bf0 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -94,6 +94,7 @@ def _real_extract(self, url): xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') formats = self._extract_smil_formats(smil_url, display_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index f23bac9a1..e6b7f3584 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -122,6 +122,7 @@ def _real_extract(self, url): for entry in f4m_formats: # URLs without the extra param induce an 404 error entry.update({'extra_param_to_segment_url': hdcore_sign}) + self._sort_formats(f4m_formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 242fba311..b2234549e 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -48,6 +48,7 @@ def _real_extract(self, url): raise ExtractorError('Unable to find stream URL') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b412fd030..40ddf175c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1021,8 +1021,6 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, 'height': int_or_none(media_el.attrib.get('height')), 'preference': preference, }) - self._sort_formats(formats) - return formats def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, @@ -1143,7 +1141,6 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, last_media = None formats.append(f) last_info = {} - self._sort_formats(formats) return formats @staticmethod @@ -1317,8 +1314,6 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para }) continue - self._sort_formats(formats) - return formats def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): @@ -1536,7 +1531,6 @@ def extract_multisegment_info(element, ms_parent_info): existing_format.update(f) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) - self._sort_formats(formats) return formats def _live_title(self, name): diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index 36af67013..f5cefd966 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -57,6 +57,7 @@ def _real_extract(self, url): formats = self._extract_m3u8_formats( video_data['videos']['variantplaylist']['uri'], video_id, 'mp4') + self._sort_formats(formats) thumbnails = [{ 'url': image['uri'], diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py index 263532cc6..cdfeccacb 100644 --- a/youtube_dl/extractor/dfb.py +++ b/youtube_dl/extractor/dfb.py @@ -38,6 +38,7 @@ def _real_extract(self, url): token_el = f4m_info.find('token') manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' formats = self._extract_f4m_formats(manifest_url, display_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index fdce1429a..5f1275b39 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -63,18 +63,23 @@ def _real_extract(self, url): video_title = info.get('playlist_title') or info.get('video_title') - entries = [{ - 'id': compat_str(video_info['id']), - 'formats': self._extract_m3u8_formats( + entries = [] + + for idx, video_info in enumerate(info['playlist']): + formats = self._extract_m3u8_formats( video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls', - note='Download m3u8 information for video %d' % (idx + 1)), - 'title': video_info['title'], - 'description': video_info.get('description'), - 'duration': parse_duration(video_info.get('video_length')), - 'webpage_url': video_info.get('href') or video_info.get('url'), - 'thumbnail': video_info.get('thumbnailURL'), - 'alt_title': video_info.get('secondary_title'), - 'timestamp': parse_iso8601(video_info.get('publishedDate')), - } for idx, video_info in enumerate(info['playlist'])] + note='Download m3u8 information for video %d' % (idx + 1)) + self._sort_formats(formats) + entries.append({ + 'id': compat_str(video_info['id']), + 'formats': formats, + 'title': video_info['title'], + 'description': video_info.get('description'), + 'duration': parse_duration(video_info.get('video_length')), + 'webpage_url': video_info.get('href') or video_info.get('url'), + 'thumbnail': video_info.get('thumbnailURL'), + 'alt_title': video_info.get('secondary_title'), + 'timestamp': parse_iso8601(video_info.get('publishedDate')), + }) return self.playlist_result(entries, display_id, video_title) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 1e7dcada6..66bbfc6ca 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -118,6 +118,8 @@ def extract_formats(protocol, manifest_url): if info.get(protocol): extract_formats(protocol, info[protocol]) + self._sort_formats(formats) + return { 'id': video_id, 'display_id': display_id, diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index b6c985547..ae7c571bd 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -39,13 +39,13 @@ def _real_extract(self, url): hidden_inputs = self._hidden_inputs(webpage) title = hidden_inputs['media_title'] - formats = [] if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': formats = self._extract_smil_formats( 'http://www.dw.com/smil/v-%s' % media_id, media_id, transform_source=lambda s: s.replace( 'rtmp://tv-od.dw.de/flash/', 'http://tv-download.dw.de/dwtv_video/flv/')) + self._sort_formats(formats) else: formats = [{'url': hidden_inputs['file_name']}] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ea4009b41..f3de738f7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1310,6 +1310,7 @@ def _real_extract(self, url): 'vcodec': 'none' if m.group('type') == 'audio' else None }] info_dict['direct'] = True + self._sort_formats(formats) info_dict['formats'] = formats return info_dict @@ -1336,6 +1337,7 @@ def _real_extract(self, url): # Is it an M3U playlist? if first_bytes.startswith(b'#EXTM3U'): info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4') + self._sort_formats(info_dict['formats']) return info_dict # Maybe it's a direct link to a video? @@ -1360,15 +1362,19 @@ def _real_extract(self, url): if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): - return self._parse_smil(doc, url, video_id) + smil = self._parse_smil(doc, url, video_id) + self._sort_formats(smil['formats']) + return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': return self.playlist_result(self._parse_xspf(doc, video_id), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, video_id, mpd_base_url=url.rpartition('/')[0]) + self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id) + self._sort_formats(info_dict['formats']) return info_dict except compat_xml_parse_error: pass @@ -2053,6 +2059,9 @@ def filter_video(urls): else: entry_info_dict['url'] = video_url + if entry_info_dict.get('formats'): + self._sort_formats(entry_info_dict['formats']) + entries.append(entry_info_dict) if len(entries) == 1: diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index d9dc067d2..d4fbafece 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -130,6 +130,7 @@ def _real_extract(self, url): formats = self._extract_f4m_formats( '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth), video_id, f4m_id='hds') + self._sort_formats(formats) categories_str = _v('meta_sports') categories = categories_str.split(',') if categories_str else [] diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 863efd896..1072405b3 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -37,6 +37,7 @@ def _real_extract(self, url): r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', webpage, 'm3u8 url', group='url') formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index e33bfde3b..80a0d7013 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -47,6 +47,7 @@ def _real_extract(self, url): video_url = self._download_json(request, video_id)['data']['videoUrl'] f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(formats) return { 'id': video_id, 'title': self._live_title('Матч ТВ - Прямой эфир'), diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 76ced7928..7b4581dc5 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -67,6 +67,7 @@ def _real_extract(self, url): formats.extend(self._extract_f4m_formats( file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', display_id, f4m_id=loc)) + self._sort_formats(formats) title = self._search_regex( r'class="Destacado-text"[^>]*>\s*([^<]+)', webpage, 'title') diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3b21fbd4d..9df200822 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -63,6 +63,7 @@ def _real_extract(self, url): if determine_ext(media_url) == 'f4m': formats = self._extract_f4m_formats( media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds') + self._sort_formats(formats) else: formats = [{ 'url': media_url, diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py index b17c2bfc0..fd50065d4 100644 --- a/youtube_dl/extractor/restudy.py +++ b/youtube_dl/extractor/restudy.py @@ -31,6 +31,7 @@ def _real_extract(self, url): formats = self._extract_smil_formats( 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id, video_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index 042bc8dab..9c89974e7 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -49,6 +49,7 @@ def _real_extract(self, url): # f4m_url = server + relative_url f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url'] f4m_formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(f4m_formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 08cd1ae6c..79af47715 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -209,6 +209,7 @@ def _real_extract(self, url): png = self._download_webpage(png_url, video_id, 'Downloading url information') m3u8_url = _decrypt_url(png) formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index 7c9d4b0cd..4896d09d6 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -38,6 +38,7 @@ def _real_extract(self, url): item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) elif item.get('type') == '': formats.append({'url': item['file']}) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index b4433a689..d95ea06be 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -77,6 +77,7 @@ def _real_extract(self, url): raise ExtractorError('This video is DRM protected.', expected=True) formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') + self._sort_formats(formats) video = self._download_json( '%s/%s/%s?%s' % ( diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 86d509ae5..4f0c66213 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -99,6 +99,7 @@ def _real_extract(self, url): webpage, 'hls file') formats = self._extract_m3u8_formats(hls, video_id, 'mp4') + self._sort_formats(formats) title = self._search_regex( r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index d6b2560f8..4b4b740b4 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -82,6 +82,7 @@ def _real_extract(self, url): ) formats = self._extract_m3u8_formats( token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native') + self._sort_formats(formats) return { 'id': embed_data['videoId'], diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 306ee4e15..7af233cd6 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -69,6 +69,7 @@ def _real_extract(self, url): apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu') m3u8_url = codecs.decode(apu, 'rot_13')[::-1] formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 0bd1e1eec..04e95c66e 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -111,6 +111,7 @@ def _real_extract(self, url): video_url = xpath_text(video, './/video_url', 'video url', fatal=True) formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds') + self._sort_formats(formats) data = self._download_json( 'http://videomore.ru/video/tracks/%s.json' % video_id, diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index c76c20614..6645c6186 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -50,6 +50,7 @@ def _real_extract(self, url): playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') + self._sort_formats(formats) title = self._og_search_title(webpage, default=display_id) description = self._og_search_description(webpage, default=None) diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index 03b9f1353..a4f914d14 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -151,6 +151,7 @@ def extract_part(part_id): smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id) smil = self._download_smil(smil_url, lecture_id) info = self._parse_smil(smil, smil_url, lecture_id) + self._sort_formats(info['formats']) info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id) info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id) if multipart: diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 2522551dc..0d943c343 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -41,10 +41,12 @@ def _real_extract(self, url): m = re.search(r'ynet - HOT -- (["\']+)(?P.+?)\1', title) if m: title = m.group('title') + formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'formats': self._extract_f4m_formats(f4m_url, video_id), + 'formats': formats, 'thumbnail': self._og_search_thumbnail(webpage), }