From 0ad92dfb180e4b29f4fec91413474588b8c2b005 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 18 Feb 2022 19:41:37 +0530 Subject: [PATCH] [youtube] De-prioritize potentially damaged formats Closes #2823 --- yt_dlp/extractor/youtube.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d10dc20e4..c03637f5f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2936,6 +2936,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' ]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[]) + approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None for fmt in streaming_formats: if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): @@ -2995,12 +2996,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itags[itag] = 'https' stream_ids.append(stream_id) - tbr = float_or_none( - fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) + tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) language_preference = ( 10 if audio_track.get('audioIsDefault') and 10 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 else -1) + # Some formats may have much smaller duration than others (possibly damaged during encoding) + # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 + is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), @@ -3009,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '%s%s' % (audio_track.get('displayName') or '', ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), - throttled and 'THROTTLED', delim=', '), + throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), 'source_preference': -10 if throttled else -1, 'fps': int_or_none(fmt.get('fps')) or None, 'height': height, @@ -3020,6 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'language': join_nonempty(audio_track.get('id', '').split('.')[0], 'desc' if language_preference < -1 else ''), 'language_preference': language_preference, + 'preference': -10 if is_damaged else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')