From 54f37eeabda3b38098231e8bd7feccfce27380c0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 18 Feb 2021 23:52:28 +0530 Subject: [PATCH] [formatsort] Remove unnecessary `field_preference` from extractors These were written with the old format sorting in mind and is no longer needed --- youtube_dlc/extractor/alura.py | 2 +- youtube_dlc/extractor/aol.py | 2 +- youtube_dlc/extractor/aparat.py | 3 +-- youtube_dlc/extractor/arcpublishing.py | 3 +-- youtube_dlc/extractor/crunchyroll.py | 2 +- youtube_dlc/extractor/leeco.py | 2 +- youtube_dlc/extractor/linkedin.py | 5 ++++- youtube_dlc/extractor/massengeschmacktv.py | 2 +- youtube_dlc/extractor/nytimes.py | 2 +- youtube_dlc/extractor/pinterest.py | 3 +-- youtube_dlc/extractor/reddit.py | 2 +- youtube_dlc/extractor/sendtonews.py | 4 +++- youtube_dlc/extractor/spankbang.py | 2 +- youtube_dlc/extractor/spankwire.py | 2 +- youtube_dlc/extractor/threeqsdn.py | 5 ++++- youtube_dlc/extractor/udemy.py | 2 +- youtube_dlc/extractor/umg.py | 2 +- youtube_dlc/extractor/viewlift.py | 2 +- youtube_dlc/extractor/vimeo.py | 3 ++- youtube_dlc/extractor/xhamster.py | 2 +- 20 files changed, 29 insertions(+), 23 deletions(-) diff --git a/youtube_dlc/extractor/alura.py b/youtube_dlc/extractor/alura.py index 36b4d95b3..2681bfc18 100644 --- a/youtube_dlc/extractor/alura.py +++ b/youtube_dlc/extractor/alura.py @@ -67,7 +67,7 @@ def _real_extract(self, url): f['height'] = int('720' if m.group('res') == 'hd' else '480') formats.extend(video_format) - self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dlc/extractor/aol.py b/youtube_dlc/extractor/aol.py index f6ecb8438..133b5e254 100644 --- a/youtube_dlc/extractor/aol.py +++ b/youtube_dlc/extractor/aol.py @@ -125,7 +125,7 @@ def _real_extract(self, url): 'height': int_or_none(qs.get('h', [None])[0]), }) formats.append(f) - self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dlc/extractor/aparat.py b/youtube_dlc/extractor/aparat.py index a9527e785..da06a3cac 100644 --- a/youtube_dlc/extractor/aparat.py +++ b/youtube_dlc/extractor/aparat.py @@ -72,8 +72,7 @@ def _real_extract(self, url): r'(\d+)[pP]', label or '', 'height', default=None)), }) - self._sort_formats( - formats, field_preference=('height', 'width', 'tbr', 'format_id')) + self._sort_formats(formats) info = self._search_json_ld(webpage, video_id, default={}) diff --git a/youtube_dlc/extractor/arcpublishing.py b/youtube_dlc/extractor/arcpublishing.py index ca6a6c4d8..48b83ce58 100644 --- a/youtube_dlc/extractor/arcpublishing.py +++ b/youtube_dlc/extractor/arcpublishing.py @@ -152,8 +152,7 @@ def _real_extract(self, url): 'url': s_url, 'preference': -1, }) - self._sort_formats( - formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id')) + self._sort_formats(formats) subtitles = {} for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []): diff --git a/youtube_dlc/extractor/crunchyroll.py b/youtube_dlc/extractor/crunchyroll.py index bc2d1fa8b..47892544f 100644 --- a/youtube_dlc/extractor/crunchyroll.py +++ b/youtube_dlc/extractor/crunchyroll.py @@ -571,7 +571,7 @@ def _real_extract(self, url): 'ext': 'flv', }) formats.append(format_info) - self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps')) + self._sort_formats(formats) metadata = self._call_rpc_api( 'VideoPlayer_GetMediaMetadata', video_id, diff --git a/youtube_dlc/extractor/leeco.py b/youtube_dlc/extractor/leeco.py index 7dc0ad794..d5e11423c 100644 --- a/youtube_dlc/extractor/leeco.py +++ b/youtube_dlc/extractor/leeco.py @@ -185,7 +185,7 @@ def get_flash_urls(media_url, format_id): f['height'] = int_or_none(format_id[:-1]) formats.append(f) - self._sort_formats(formats, ('height', 'quality', 'format_id')) + self._sort_formats(formats, ('res', 'quality')) publish_time = parse_iso8601(self._html_search_regex( r'发布时间 ([^<>]+) ', page, 'publish time', default=None), diff --git a/youtube_dlc/extractor/linkedin.py b/youtube_dlc/extractor/linkedin.py index 26fc703d1..e1dca4899 100644 --- a/youtube_dlc/extractor/linkedin.py +++ b/youtube_dlc/extractor/linkedin.py @@ -124,7 +124,10 @@ def _real_extract(self, url): streaming_url, video_slug, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) + # It seems like this would be correctly handled by default + # However, unless someone can confirm this, the old + # behaviour is being kept as-is + self._sort_formats(formats, ('res', 'source_preference')) return { 'id': self._get_video_id(video_data, course_slug, video_slug), diff --git a/youtube_dlc/extractor/massengeschmacktv.py b/youtube_dlc/extractor/massengeschmacktv.py index cfcc6b224..b381d31b4 100644 --- a/youtube_dlc/extractor/massengeschmacktv.py +++ b/youtube_dlc/extractor/massengeschmacktv.py @@ -67,7 +67,7 @@ def _real_extract(self, url): 'vcodec': 'none' if format_id.startswith('Audio') else None, }) - self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr')) + self._sort_formats(formats) return { 'id': episode, diff --git a/youtube_dlc/extractor/nytimes.py b/youtube_dlc/extractor/nytimes.py index 976b1c694..1f03a9462 100644 --- a/youtube_dlc/extractor/nytimes.py +++ b/youtube_dlc/extractor/nytimes.py @@ -72,7 +72,7 @@ def get_file_size(file_size): 'tbr': int_or_none(video.get('bitrate'), 1000) or None, 'ext': ext, }) - self._sort_formats(formats, ('height', 'width', 'filesize', 'tbr', 'fps', 'format_id')) + self._sort_formats(formats) thumbnails = [] for image in video_data.get('images', []): diff --git a/youtube_dlc/extractor/pinterest.py b/youtube_dlc/extractor/pinterest.py index b249c9eda..15c11a755 100644 --- a/youtube_dlc/extractor/pinterest.py +++ b/youtube_dlc/extractor/pinterest.py @@ -54,8 +54,7 @@ def _extract_video(self, data, extract_formats=True): 'height': int_or_none(format_dict.get('height')), 'duration': duration, }) - self._sort_formats( - formats, field_preference=('height', 'width', 'tbr', 'format_id')) + self._sort_formats(formats) description = data.get('description') or data.get('description_html') or data.get('seo_description') timestamp = unified_timestamp(data.get('created_at')) diff --git a/youtube_dlc/extractor/reddit.py b/youtube_dlc/extractor/reddit.py index 77f66c966..222fa0172 100644 --- a/youtube_dlc/extractor/reddit.py +++ b/youtube_dlc/extractor/reddit.py @@ -40,7 +40,7 @@ def _real_extract(self, url): 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, mpd_id='dash', fatal=False)) - self._sort_formats(formats, ('height', 'width')) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dlc/extractor/sendtonews.py b/youtube_dlc/extractor/sendtonews.py index 9d9652949..bc38a0f1e 100644 --- a/youtube_dlc/extractor/sendtonews.py +++ b/youtube_dlc/extractor/sendtonews.py @@ -80,7 +80,9 @@ def _real_extract(self, url): 'format_id': '%s-%d' % (determine_protocol(f), tbr), 'tbr': tbr, }) - self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id')) + # 'tbr' was explicitly set to be prefered over 'height' originally, + # So this is being kept unless someone can confirm this is unnecessary + self._sort_formats(info_dict['formats'], ('tbr', 'res')) thumbnails = [] if video.get('thumbnailUrl'): diff --git a/youtube_dlc/extractor/spankbang.py b/youtube_dlc/extractor/spankbang.py index 37cb8c839..f14bd6d71 100644 --- a/youtube_dlc/extractor/spankbang.py +++ b/youtube_dlc/extractor/spankbang.py @@ -129,7 +129,7 @@ def extract_format(format_id, format_url): format_url = format_url[0] extract_format(format_id, format_url) - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) + self._sort_formats(formats) info = self._search_json_ld(webpage, video_id, default={}) diff --git a/youtube_dlc/extractor/spankwire.py b/youtube_dlc/extractor/spankwire.py index 35ab9ec37..e97c1d23e 100644 --- a/youtube_dlc/extractor/spankwire.py +++ b/youtube_dlc/extractor/spankwire.py @@ -108,7 +108,7 @@ def _real_extract(self, url): formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id')) + self._sort_formats(formats) view_count = str_to_int(video.get('viewed')) diff --git a/youtube_dlc/extractor/threeqsdn.py b/youtube_dlc/extractor/threeqsdn.py index f6d37bb9e..71aa357df 100644 --- a/youtube_dlc/extractor/threeqsdn.py +++ b/youtube_dlc/extractor/threeqsdn.py @@ -138,7 +138,10 @@ def _real_extract(self, url): f['preference'] = -40 elif f.get('vcodec') == 'none': f['preference'] = -50 - self._sort_formats(formats, ('preference', 'width', 'height', 'source_preference', 'tbr', 'vbr', 'abr', 'ext', 'format_id')) + # It seems like this would be correctly handled by default + # However, unless someone can confirm this, the old + # behaviour is being kept as-is + self._sort_formats(formats, ('res', 'source_preference')) subtitles = {} for subtitle in (config.get('subtitles') or []): diff --git a/youtube_dlc/extractor/udemy.py b/youtube_dlc/extractor/udemy.py index 60e364d30..5b81aa365 100644 --- a/youtube_dlc/extractor/udemy.py +++ b/youtube_dlc/extractor/udemy.py @@ -405,7 +405,7 @@ def extract_subtitles(track_list): if f.get('url'): formats.append(f) - self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dlc/extractor/umg.py b/youtube_dlc/extractor/umg.py index d815cd9a6..8c84f2009 100644 --- a/youtube_dlc/extractor/umg.py +++ b/youtube_dlc/extractor/umg.py @@ -91,7 +91,7 @@ def add_m3u8_format(format_id): if not formats: for format_id in (867, 836, 940): add_m3u8_format(format_id) - self._sort_formats(formats, ('width', 'height', 'filesize', 'tbr')) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dlc/extractor/viewlift.py b/youtube_dlc/extractor/viewlift.py index d6b92b1c8..55c2b95c2 100644 --- a/youtube_dlc/extractor/viewlift.py +++ b/youtube_dlc/extractor/viewlift.py @@ -134,7 +134,7 @@ def _real_extract(self, url): if hls_url: formats.extend(self._extract_m3u8_formats( hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats, ('height', 'tbr', 'format_id')) + self._sort_formats(formats) info = { 'id': film_id, diff --git a/youtube_dlc/extractor/vimeo.py b/youtube_dlc/extractor/vimeo.py index 299d99f6f..600426197 100644 --- a/youtube_dlc/extractor/vimeo.py +++ b/youtube_dlc/extractor/vimeo.py @@ -116,7 +116,8 @@ def _set_vimeo_cookie(self, name, value): def _vimeo_sort_formats(self, formats): # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps # at the same time without actual units specified. This lead to wrong sorting. - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) + # But since yt-dlp prefers 'res,fps' anyway, 'field_preference' is not needed + self._sort_formats(formats) def _parse_config(self, config, video_id): video_data = config['video'] diff --git a/youtube_dlc/extractor/xhamster.py b/youtube_dlc/extractor/xhamster.py index f73b9778f..1c9398986 100644 --- a/youtube_dlc/extractor/xhamster.py +++ b/youtube_dlc/extractor/xhamster.py @@ -231,7 +231,7 @@ def get_height(s): 'Referer': standard_url, }, }) - self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) + self._sort_formats(formats) categories_list = video.get('categories') if isinstance(categories_list, list):