From aa9369a2d84e4e05f4a8be16c85afc7ee2e902bd Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 04:19:33 +0530 Subject: [PATCH] [cleanup] Minor improvements to error and debug messages --- yt_dlp/YoutubeDL.py | 52 +++++++++++++++++++++---------------- yt_dlp/__init__.py | 8 +++--- yt_dlp/extractor/generic.py | 39 +++++++++++++++++++++++++++- yt_dlp/extractor/youtube.py | 6 +++-- yt_dlp/utils.py | 32 ++++++++++++++++------- 5 files changed, 98 insertions(+), 39 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 92e430fda..2bf527770 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1318,9 +1318,9 @@ def wrapper(self, *args, **kwargs): self.report_error(msg) except ExtractorError as e: # An error we somewhat expected self.report_error(compat_str(e), e.format_traceback()) - except ThrottledDownload: + except ThrottledDownload as e: self.to_stderr('\r') - self.report_warning('The download speed is below throttle limit. Re-extracting data') + self.report_warning(f'{e}; Re-extracting data') return wrapper(self, *args, **kwargs) except (DownloadCancelled, LazyList.IndexError): raise @@ -1499,7 +1499,7 @@ def __process_playlist(self, ie_result, download): self.to_screen('[download] Downloading playlist: %s' % playlist) if 'entries' not in ie_result: - raise EntryNotInPlaylist() + raise EntryNotInPlaylist('There are no entries') incomplete_entries = bool(ie_result.get('requested_entries')) if incomplete_entries: def fill_missing_entries(entries, indexes): @@ -1561,7 +1561,7 @@ def get_entry(i): raise EntryNotInPlaylist() except (IndexError, EntryNotInPlaylist): if incomplete_entries: - raise EntryNotInPlaylist() + raise EntryNotInPlaylist(f'Entry {i} cannot be found') elif not playlistitems: break entries.append(entry) @@ -2935,8 +2935,25 @@ def ffmpeg_fixup(cndn, msg, cls): if max_downloads is not None and self._num_downloads >= int(max_downloads): raise MaxDownloadsReached() + def __download_wrapper(self, func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + res = func(*args, **kwargs) + except UnavailableVideoError as e: + self.report_error(e) + except DownloadCancelled as e: + self.to_screen(f'[info] {e}') + raise + else: + if self.params.get('dump_single_json', False): + self.post_extract(res) + self.to_stdout(json.dumps(self.sanitize_info(res))) + return wrapper + def download(self, url_list): """Download a given list of URLs.""" + url_list = variadic(url_list) # Passing a single URL is a common mistake outtmpl = self.outtmpl_dict['default'] if (len(url_list) > 1 and outtmpl != '-' @@ -2945,19 +2962,8 @@ def download(self, url_list): raise SameFileError(outtmpl) for url in url_list: - try: - # It also downloads the videos - res = self.extract_info( - url, force_generic_extractor=self.params.get('force_generic_extractor', False)) - except UnavailableVideoError: - self.report_error('unable to download video') - except DownloadCancelled as e: - self.to_screen(f'[info] {e.msg}') - raise - else: - if self.params.get('dump_single_json', False): - self.post_extract(res) - self.to_stdout(json.dumps(self.sanitize_info(res))) + self.__download_wrapper(self.extract_info)( + url, force_generic_extractor=self.params.get('force_generic_extractor', False)) return self._download_retcode @@ -2968,11 +2974,12 @@ def download_with_info_file(self, info_filename): # FileInput doesn't have a read method, we can't call json.load info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) try: - self.process_ie_result(info, download=True) - except (DownloadError, EntryNotInPlaylist, ThrottledDownload): + self.__download_wrapper(self.process_ie_result)(info, download=True) + except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e: + self.to_stderr('\r') webpage_url = info.get('webpage_url') if webpage_url is not None: - self.report_warning('The info failed to download, trying with "%s"' % webpage_url) + self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') return self.download([webpage_url]) else: raise @@ -3566,14 +3573,15 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None for t in thumbnails[::-1]: thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') - thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '') + thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) if not self.params.get('overwrites', True) and os.path.exists(thumb_filename): ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename - self.to_screen(f'[info] {thumb_display_id.title()} is already present') + self.to_screen('[info] %s is already present' % ( + thumb_display_id if multiple else f'{label} thumbnail').capitalize()) else: self.to_screen(f'[info] Downloading {thumb_display_id} ...') try: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 3020b6e95..d72e08b35 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -795,15 +795,15 @@ def main(argv=None): _real_main(argv) except DownloadError: sys.exit(1) - except SameFileError: - sys.exit('ERROR: fixed output name but more than one file to download') + except SameFileError as e: + sys.exit(f'ERROR: {e}') except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') - except BrokenPipeError: + except BrokenPipeError as e: # https://docs.python.org/3/library/signal.html#note-on-sigpipe devnull = os.open(os.devnull, os.O_WRONLY) os.dup2(devnull, sys.stdout.fileno()) - sys.exit(r'\nERROR: {err}') + sys.exit(f'\nERROR: {e}') __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 26f9497f1..36767b41f 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2337,6 +2337,9 @@ def report_following_redirect(self, new_url): """Report information extraction.""" self._downloader.to_screen('[redirect] Following redirect to %s' % new_url) + def report_detected(self, name): + self._downloader.write_debug(f'Identified a {name}') + def _extract_rss(self, url, video_id, doc): playlist_title = doc.find('./channel/title').text playlist_desc_el = doc.find('./channel/description') @@ -2552,6 +2555,7 @@ def _real_extract(self, url): content_type = head_response.headers.get('Content-Type', '').lower() m = re.match(r'^(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P[^;\s]+)', content_type) if m: + self.report_detected('direct video link') format_id = compat_str(m.group('format_id')) subtitles = {} if format_id.endswith('mpegurl'): @@ -2592,6 +2596,7 @@ def _real_extract(self, url): # Is it an M3U playlist? if first_bytes.startswith(b'#EXTM3U'): + self.report_detected('M3U playlist') info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') self._sort_formats(info_dict['formats']) return info_dict @@ -2622,16 +2627,20 @@ def _real_extract(self, url): except compat_xml_parse_error: doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': + self.report_detected('RSS feed') return self._extract_rss(url, video_id, doc) elif doc.tag == 'SmoothStreamingMedia': info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url) + self.report_detected('ISM manifest') self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): smil = self._parse_smil(doc, url, video_id) + self.report_detected('SMIL file') self._sort_formats(smil['formats']) return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': + self.report_detected('XSPF playlist') return self.playlist_result( self._parse_xspf( doc, video_id, xspf_url=url, @@ -2642,10 +2651,12 @@ def _real_extract(self, url): doc, mpd_base_url=full_response.geturl().rpartition('/')[0], mpd_url=url) + self.report_detected('DASH manifest') self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id) + self.report_detected('F4M manifest') self._sort_formats(info_dict['formats']) return info_dict except compat_xml_parse_error: @@ -2654,6 +2665,7 @@ def _real_extract(self, url): # Is it a Camtasia project? camtasia_res = self._extract_camtasia(url, video_id, webpage) if camtasia_res is not None: + self.report_detected('Camtasia video') return camtasia_res # Sometimes embedded video player is hidden behind percent encoding @@ -2704,6 +2716,8 @@ def _real_extract(self, url): 'age_limit': age_limit, }) + self._downloader.write_debug('Looking for video embeds') + # Look for Brightcove Legacy Studio embeds bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) if bc_urls: @@ -3497,6 +3511,7 @@ def _real_extract(self, url): # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: + self.report_detected('HTML5 media') if len(entries) == 1: entries[0].update({ 'id': video_id, @@ -3516,6 +3531,7 @@ def _real_extract(self, url): webpage, video_id, transform_source=js_to_json) if jwplayer_data: if isinstance(jwplayer_data.get('playlist'), str): + self.report_detected('JW Player playlist') return { **info_dict, '_type': 'url', @@ -3525,6 +3541,7 @@ def _real_extract(self, url): try: info = self._parse_jwplayer_data( jwplayer_data, video_id, require_title=False, base_url=url) + self.report_detected('JW Player data') return merge_dicts(info, info_dict) except ExtractorError: # See https://github.com/ytdl-org/youtube-dl/pull/16735 @@ -3574,6 +3591,7 @@ def _real_extract(self, url): }, }) if formats or subtitles: + self.report_detected('video.js embed') self._sort_formats(formats) info_dict['formats'] = formats info_dict['subtitles'] = subtitles @@ -3582,6 +3600,7 @@ def _real_extract(self, url): # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url'): + self.report_detected('JSON LD') return merge_dicts(json_ld, info_dict) def check_video(vurl): @@ -3598,7 +3617,9 @@ def filter_video(urls): # Start with something easy: JW Player in SWFObject found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)) - if not found: + if found: + self.report_detected('JW Player in SFWObject') + else: # Look for gorilla-vid style embedding found = filter_video(re.findall(r'''(?sx) (?: @@ -3608,10 +3629,13 @@ def filter_video(urls): ) .*? ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) + if found: + self.report_detected('JW Player embed') if not found: # Look for generic KVS player found = re.search(r'', webpage) @@ -3657,10 +3681,14 @@ def filter_video(urls): if not found: # Broaden the search a little bit found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) + if found: + self.report_detected('video file') if not found: # Broaden the findall a little bit: JWPlayer JS loader found = filter_video(re.findall( r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)) + if found: + self.report_detected('JW Player JS loader') if not found: # Flow player found = filter_video(re.findall(r'''(?xs) @@ -3669,10 +3697,14 @@ def filter_video(urls): \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s* ["']?url["']?\s*:\s*["']([^"']+)["'] ''', webpage)) + if found: + self.report_detected('Flow Player') if not found: # Cinerama player found = re.findall( r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage) + if found: + self.report_detected('Cinerama player') if not found: # Try to find twitter cards info # twitter:player:stream should be checked before twitter:player since @@ -3680,6 +3712,8 @@ def filter_video(urls): # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser) found = filter_video(re.findall( r' {self._player_cache[sig_id]}') return self._player_cache[sig_id] except Exception as e: - raise ExtractorError(traceback.format_exc(), cause=e) + raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id) def _extract_n_function_name(self, jscode): return self._search_regex( @@ -2496,7 +2496,9 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live): fmt_url = update_url_query(fmt_url, { 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) except ExtractorError as e: - self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True) + self.report_warning( + f'nsig extraction failed: You may experience throttling for some formats\n' + f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True) throttled = True if itag: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 875ab5e72..cd453f367 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2459,7 +2459,14 @@ def bug_reports_message(before=';'): class YoutubeDLError(Exception): """Base exception for YoutubeDL errors.""" - pass + msg = None + + def __init__(self, msg=None): + if msg is not None: + self.msg = msg + elif self.msg is None: + self.msg = type(self).__name__ + super().__init__(self.msg) network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error] @@ -2544,7 +2551,7 @@ class EntryNotInPlaylist(YoutubeDLError): This exception will be thrown by YoutubeDL when a requested entry is not found in the playlist info_dict """ - pass + msg = 'Entry not found in info' class SameFileError(YoutubeDLError): @@ -2553,7 +2560,12 @@ class SameFileError(YoutubeDLError): This exception will be thrown by FileDownloader objects if they detect multiple files would have to be downloaded to the same file on disk. """ - pass + msg = 'Fixed output name but more than one file to download' + + def __init__(self, filename=None): + if filename is not None: + self.msg += f': {filename}' + super().__init__(self.msg) class PostProcessingError(YoutubeDLError): @@ -2572,11 +2584,6 @@ class DownloadCancelled(YoutubeDLError): """ Exception raised when the download queue should be interrupted """ msg = 'The download was cancelled' - def __init__(self, msg=None): - if msg is not None: - self.msg = msg - YoutubeDLError.__init__(self, self.msg) - class ExistingVideoReached(DownloadCancelled): """ --break-on-existing triggered """ @@ -2595,7 +2602,7 @@ class MaxDownloadsReached(DownloadCancelled): class ThrottledDownload(YoutubeDLError): """ Download speed below --throttled-rate. """ - pass + msg = 'The download speed is below throttle limit' class UnavailableVideoError(YoutubeDLError): @@ -2604,7 +2611,12 @@ class UnavailableVideoError(YoutubeDLError): This exception will be thrown when a video is requested in a format that is not available for that video. """ - pass + msg = 'Unable to download video' + + def __init__(self, err=None): + if err is not None: + self.msg += f': {err}' + super().__init__(self.msg) class ContentTooShortError(YoutubeDLError):