From aa9369a2d84e4e05f4a8be16c85afc7ee2e902bd Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Wed, 10 Nov 2021 04:19:33 +0530
Subject: [PATCH] [cleanup] Minor improvements to error and debug messages

---
 yt_dlp/YoutubeDL.py         | 52 +++++++++++++++++++++----------------
 yt_dlp/__init__.py          |  8 +++---
 yt_dlp/extractor/generic.py | 39 +++++++++++++++++++++++++++-
 yt_dlp/extractor/youtube.py |  6 +++--
 yt_dlp/utils.py             | 32 ++++++++++++++++-------
 5 files changed, 98 insertions(+), 39 deletions(-)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 92e430fda..2bf527770 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1318,9 +1318,9 @@ def wrapper(self, *args, **kwargs):
                 self.report_error(msg)
             except ExtractorError as e:  # An error we somewhat expected
                 self.report_error(compat_str(e), e.format_traceback())
-            except ThrottledDownload:
+            except ThrottledDownload as e:
                 self.to_stderr('\r')
-                self.report_warning('The download speed is below throttle limit. Re-extracting data')
+                self.report_warning(f'{e}; Re-extracting data')
                 return wrapper(self, *args, **kwargs)
             except (DownloadCancelled, LazyList.IndexError):
                 raise
@@ -1499,7 +1499,7 @@ def __process_playlist(self, ie_result, download):
         self.to_screen('[download] Downloading playlist: %s' % playlist)
 
         if 'entries' not in ie_result:
-            raise EntryNotInPlaylist()
+            raise EntryNotInPlaylist('There are no entries')
         incomplete_entries = bool(ie_result.get('requested_entries'))
         if incomplete_entries:
             def fill_missing_entries(entries, indexes):
@@ -1561,7 +1561,7 @@ def get_entry(i):
                     raise EntryNotInPlaylist()
             except (IndexError, EntryNotInPlaylist):
                 if incomplete_entries:
-                    raise EntryNotInPlaylist()
+                    raise EntryNotInPlaylist(f'Entry {i} cannot be found')
                 elif not playlistitems:
                     break
             entries.append(entry)
@@ -2935,8 +2935,25 @@ def ffmpeg_fixup(cndn, msg, cls):
         if max_downloads is not None and self._num_downloads >= int(max_downloads):
             raise MaxDownloadsReached()
 
+    def __download_wrapper(self, func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                res = func(*args, **kwargs)
+            except UnavailableVideoError as e:
+                self.report_error(e)
+            except DownloadCancelled as e:
+                self.to_screen(f'[info] {e}')
+                raise
+            else:
+                if self.params.get('dump_single_json', False):
+                    self.post_extract(res)
+                    self.to_stdout(json.dumps(self.sanitize_info(res)))
+        return wrapper
+
     def download(self, url_list):
         """Download a given list of URLs."""
+        url_list = variadic(url_list)  # Passing a single URL is a common mistake
         outtmpl = self.outtmpl_dict['default']
         if (len(url_list) > 1
                 and outtmpl != '-'
@@ -2945,19 +2962,8 @@ def download(self, url_list):
             raise SameFileError(outtmpl)
 
         for url in url_list:
-            try:
-                # It also downloads the videos
-                res = self.extract_info(
-                    url, force_generic_extractor=self.params.get('force_generic_extractor', False))
-            except UnavailableVideoError:
-                self.report_error('unable to download video')
-            except DownloadCancelled as e:
-                self.to_screen(f'[info] {e.msg}')
-                raise
-            else:
-                if self.params.get('dump_single_json', False):
-                    self.post_extract(res)
-                    self.to_stdout(json.dumps(self.sanitize_info(res)))
+            self.__download_wrapper(self.extract_info)(
+                url, force_generic_extractor=self.params.get('force_generic_extractor', False))
 
         return self._download_retcode
 
@@ -2968,11 +2974,12 @@ def download_with_info_file(self, info_filename):
             # FileInput doesn't have a read method, we can't call json.load
             info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
         try:
-            self.process_ie_result(info, download=True)
-        except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
+            self.__download_wrapper(self.process_ie_result)(info, download=True)
+        except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
+            self.to_stderr('\r')
             webpage_url = info.get('webpage_url')
             if webpage_url is not None:
-                self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
+                self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
                 return self.download([webpage_url])
             else:
                 raise
@@ -3566,14 +3573,15 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
 
         for t in thumbnails[::-1]:
             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
-            thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
+            thumb_display_id = f'{label} thumbnail {t["id"]}'
             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
             thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
 
             if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
                 ret.append((thumb_filename, thumb_filename_final))
                 t['filepath'] = thumb_filename
-                self.to_screen(f'[info] {thumb_display_id.title()} is already present')
+                self.to_screen('[info] %s is already present' % (
+                    thumb_display_id if multiple else f'{label} thumbnail').capitalize())
             else:
                 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
                 try:
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 3020b6e95..d72e08b35 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -795,15 +795,15 @@ def main(argv=None):
         _real_main(argv)
     except DownloadError:
         sys.exit(1)
-    except SameFileError:
-        sys.exit('ERROR: fixed output name but more than one file to download')
+    except SameFileError as e:
+        sys.exit(f'ERROR: {e}')
     except KeyboardInterrupt:
         sys.exit('\nERROR: Interrupted by user')
-    except BrokenPipeError:
+    except BrokenPipeError as e:
         # https://docs.python.org/3/library/signal.html#note-on-sigpipe
         devnull = os.open(os.devnull, os.O_WRONLY)
         os.dup2(devnull, sys.stdout.fileno())
-        sys.exit(r'\nERROR: {err}')
+        sys.exit(f'\nERROR: {e}')
 
 
 __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 26f9497f1..36767b41f 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2337,6 +2337,9 @@ def report_following_redirect(self, new_url):
         """Report information extraction."""
         self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
 
+    def report_detected(self, name):
+        self._downloader.write_debug(f'Identified a {name}')
+
     def _extract_rss(self, url, video_id, doc):
         playlist_title = doc.find('./channel/title').text
         playlist_desc_el = doc.find('./channel/description')
@@ -2552,6 +2555,7 @@ def _real_extract(self, url):
         content_type = head_response.headers.get('Content-Type', '').lower()
         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
         if m:
+            self.report_detected('direct video link')
             format_id = compat_str(m.group('format_id'))
             subtitles = {}
             if format_id.endswith('mpegurl'):
@@ -2592,6 +2596,7 @@ def _real_extract(self, url):
 
         # Is it an M3U playlist?
         if first_bytes.startswith(b'#EXTM3U'):
+            self.report_detected('M3U playlist')
             info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
             self._sort_formats(info_dict['formats'])
             return info_dict
@@ -2622,16 +2627,20 @@ def _real_extract(self, url):
             except compat_xml_parse_error:
                 doc = compat_etree_fromstring(webpage.encode('utf-8'))
             if doc.tag == 'rss':
+                self.report_detected('RSS feed')
                 return self._extract_rss(url, video_id, doc)
             elif doc.tag == 'SmoothStreamingMedia':
                 info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
+                self.report_detected('ISM manifest')
                 self._sort_formats(info_dict['formats'])
                 return info_dict
             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
                 smil = self._parse_smil(doc, url, video_id)
+                self.report_detected('SMIL file')
                 self._sort_formats(smil['formats'])
                 return smil
             elif doc.tag == '{http://xspf.org/ns/0/}playlist':
+                self.report_detected('XSPF playlist')
                 return self.playlist_result(
                     self._parse_xspf(
                         doc, video_id, xspf_url=url,
@@ -2642,10 +2651,12 @@ def _real_extract(self, url):
                     doc,
                     mpd_base_url=full_response.geturl().rpartition('/')[0],
                     mpd_url=url)
+                self.report_detected('DASH manifest')
                 self._sort_formats(info_dict['formats'])
                 return info_dict
             elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
                 info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+                self.report_detected('F4M manifest')
                 self._sort_formats(info_dict['formats'])
                 return info_dict
         except compat_xml_parse_error:
@@ -2654,6 +2665,7 @@ def _real_extract(self, url):
         # Is it a Camtasia project?
         camtasia_res = self._extract_camtasia(url, video_id, webpage)
         if camtasia_res is not None:
+            self.report_detected('Camtasia video')
             return camtasia_res
 
         # Sometimes embedded video player is hidden behind percent encoding
@@ -2704,6 +2716,8 @@ def _real_extract(self, url):
             'age_limit': age_limit,
         })
 
+        self._downloader.write_debug('Looking for video embeds')
+
         # Look for Brightcove Legacy Studio embeds
         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
         if bc_urls:
@@ -3497,6 +3511,7 @@ def _real_extract(self, url):
         # Look for HTML5 media
         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
         if entries:
+            self.report_detected('HTML5 media')
             if len(entries) == 1:
                 entries[0].update({
                     'id': video_id,
@@ -3516,6 +3531,7 @@ def _real_extract(self, url):
             webpage, video_id, transform_source=js_to_json)
         if jwplayer_data:
             if isinstance(jwplayer_data.get('playlist'), str):
+                self.report_detected('JW Player playlist')
                 return {
                     **info_dict,
                     '_type': 'url',
@@ -3525,6 +3541,7 @@ def _real_extract(self, url):
             try:
                 info = self._parse_jwplayer_data(
                     jwplayer_data, video_id, require_title=False, base_url=url)
+                self.report_detected('JW Player data')
                 return merge_dicts(info, info_dict)
             except ExtractorError:
                 # See https://github.com/ytdl-org/youtube-dl/pull/16735
@@ -3574,6 +3591,7 @@ def _real_extract(self, url):
                         },
                     })
             if formats or subtitles:
+                self.report_detected('video.js embed')
                 self._sort_formats(formats)
                 info_dict['formats'] = formats
                 info_dict['subtitles'] = subtitles
@@ -3582,6 +3600,7 @@ def _real_extract(self, url):
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(webpage, video_id, default={})
         if json_ld.get('url'):
+            self.report_detected('JSON LD')
             return merge_dicts(json_ld, info_dict)
 
         def check_video(vurl):
@@ -3598,7 +3617,9 @@ def filter_video(urls):
 
         # Start with something easy: JW Player in SWFObject
         found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
-        if not found:
+        if found:
+            self.report_detected('JW Player in SFWObject')
+        else:
             # Look for gorilla-vid style embedding
             found = filter_video(re.findall(r'''(?sx)
                 (?:
@@ -3608,10 +3629,13 @@ def filter_video(urls):
                 )
                 .*?
                 ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
+            if found:
+                self.report_detected('JW Player embed')
         if not found:
             # Look for generic KVS player
             found = re.search(r'<script [^>]*?src="https://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
             if found:
+                self.report_detected('KWS Player')
                 if found.group('maj_ver') not in ['4', '5']:
                     self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver'))
                 flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage)
@@ -3657,10 +3681,14 @@ def filter_video(urls):
         if not found:
             # Broaden the search a little bit
             found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
+            if found:
+                self.report_detected('video file')
         if not found:
             # Broaden the findall a little bit: JWPlayer JS loader
             found = filter_video(re.findall(
                 r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
+            if found:
+                self.report_detected('JW Player JS loader')
         if not found:
             # Flow player
             found = filter_video(re.findall(r'''(?xs)
@@ -3669,10 +3697,14 @@ def filter_video(urls):
                     \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
                         ["']?url["']?\s*:\s*["']([^"']+)["']
             ''', webpage))
+            if found:
+                self.report_detected('Flow Player')
         if not found:
             # Cinerama player
             found = re.findall(
                 r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
+            if found:
+                self.report_detected('Cinerama player')
         if not found:
             # Try to find twitter cards info
             # twitter:player:stream should be checked before twitter:player since
@@ -3680,6 +3712,8 @@ def filter_video(urls):
             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
             found = filter_video(re.findall(
                 r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
+            if found:
+                self.report_detected('Twitter card')
         if not found:
             # We look for Open Graph info:
             # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
@@ -3687,6 +3721,8 @@ def filter_video(urls):
             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
             if m_video_type is not None:
                 found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
+                if found:
+                    self.report_detected('Open Graph video info')
         if not found:
             REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
             found = re.search(
@@ -3718,6 +3754,7 @@ def filter_video(urls):
             # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
             embed_url = self._html_search_meta('twitter:player', webpage, default=None)
             if embed_url and embed_url != url:
+                self.report_detected('twitter:player iframe')
                 return self.url_result(embed_url)
 
         if not found:
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index d83192663..25554c862 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -1864,7 +1864,7 @@ def _decrypt_nsig(self, s, video_id, player_url):
             self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
             return self._player_cache[sig_id]
         except Exception as e:
-            raise ExtractorError(traceback.format_exc(), cause=e)
+            raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
 
     def _extract_n_function_name(self, jscode):
         return self._search_regex(
@@ -2496,7 +2496,9 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live):
                     fmt_url = update_url_query(fmt_url, {
                         'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
                 except ExtractorError as e:
-                    self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True)
+                    self.report_warning(
+                        f'nsig extraction failed: You may experience throttling for some formats\n'
+                        f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
                     throttled = True
 
             if itag:
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 875ab5e72..cd453f367 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2459,7 +2459,14 @@ def bug_reports_message(before=';'):
 
 class YoutubeDLError(Exception):
     """Base exception for YoutubeDL errors."""
-    pass
+    msg = None
+
+    def __init__(self, msg=None):
+        if msg is not None:
+            self.msg = msg
+        elif self.msg is None:
+            self.msg = type(self).__name__
+        super().__init__(self.msg)
 
 
 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
@@ -2544,7 +2551,7 @@ class EntryNotInPlaylist(YoutubeDLError):
     This exception will be thrown by YoutubeDL when a requested entry
     is not found in the playlist info_dict
     """
-    pass
+    msg = 'Entry not found in info'
 
 
 class SameFileError(YoutubeDLError):
@@ -2553,7 +2560,12 @@ class SameFileError(YoutubeDLError):
     This exception will be thrown by FileDownloader objects if they detect
     multiple files would have to be downloaded to the same file on disk.
     """
-    pass
+    msg = 'Fixed output name but more than one file to download'
+
+    def __init__(self, filename=None):
+        if filename is not None:
+            self.msg += f': {filename}'
+        super().__init__(self.msg)
 
 
 class PostProcessingError(YoutubeDLError):
@@ -2572,11 +2584,6 @@ class DownloadCancelled(YoutubeDLError):
     """ Exception raised when the download queue should be interrupted """
     msg = 'The download was cancelled'
 
-    def __init__(self, msg=None):
-        if msg is not None:
-            self.msg = msg
-        YoutubeDLError.__init__(self, self.msg)
-
 
 class ExistingVideoReached(DownloadCancelled):
     """ --break-on-existing triggered """
@@ -2595,7 +2602,7 @@ class MaxDownloadsReached(DownloadCancelled):
 
 class ThrottledDownload(YoutubeDLError):
     """ Download speed below --throttled-rate. """
-    pass
+    msg = 'The download speed is below throttle limit'
 
 
 class UnavailableVideoError(YoutubeDLError):
@@ -2604,7 +2611,12 @@ class UnavailableVideoError(YoutubeDLError):
     This exception will be thrown when a video is requested
     in a format that is not available for that video.
     """
-    pass
+    msg = 'Unable to download video'
+
+    def __init__(self, err=None):
+        if err is not None:
+            self.msg += f': {err}'
+        super().__init__(self.msg)
 
 
 class ContentTooShortError(YoutubeDLError):