[fragment] Read downloaded fragments only when needed (#3069)

Authored by: Lesmiscore
2024-12-29 01:51:06 +00:00 · 2022-03-15 12:27:41 +09:00 · 2022-03-15 12:27:41 +09:00 · d71fd41249
commit d71fd41249
parent d69e55c1d8
4 changed files with 29 additions and 30 deletions
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@ -133,19 +133,19 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat
        }
        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
        if not success:
-            return False, None
+            return False
        if fragment_info_dict.get('filetime'):
            ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
        ctx['fragment_filename_sanitized'] = fragment_filename
-        try:
-            return True, self._read_fragment(ctx)
-        except FileNotFoundError:
-            if not info_dict.get('is_live'):
-                raise
-            return False, None
+        return True

    def _read_fragment(self, ctx):
-        down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
+        try:
+            down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
+        except FileNotFoundError:
+            if ctx.get('live'):
+                return None
+            raise
        ctx['fragment_filename_sanitized'] = frag_sanitized
        frag_content = down.read()
        down.close()
@ -457,7 +457,7 @@ def download_and_append_fragments(

        def download_fragment(fragment, ctx):
            if not interrupt_trigger[0]:
-                return False, fragment['frag_index']
+                return

            frag_index = ctx['fragment_index'] = fragment['frag_index']
            ctx['last_error'] = None
@ -467,14 +467,12 @@ def download_fragment(fragment, ctx):
                headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)

            # Never skip the first fragment
-            fatal = is_fatal(fragment.get('index') or (frag_index - 1))
-            count, frag_content = 0, None
+            fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
            while count <= fragment_retries:
                try:
-                    success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
-                    if not success:
-                        return False, frag_index
-                    break
+                    if self._download_fragment(ctx, fragment['url'], info_dict, headers):
+                        break
+                    return
                except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
                    # Unavailable (possibly temporary) fragments may be served.
                    # First we try to retry then either skip or abort.
@ -491,13 +489,9 @@ def download_fragment(fragment, ctx):
                        break
                    raise

-            if count > fragment_retries:
-                if not fatal:
-                    return False, frag_index
+            if count > fragment_retries and fatal:
                ctx['dest_stream'].close()
                self.report_error('Giving up after %s fragment retries' % fragment_retries)
-                return False, frag_index
-            return frag_content, frag_index

        def append_fragment(frag_content, frag_index, ctx):
            if not frag_content:
@ -520,23 +514,23 @@ def append_fragment(frag_content, frag_index, ctx):

            def _download_fragment(fragment):
                ctx_copy = ctx.copy()
-                frag_content, frag_index = download_fragment(fragment, ctx_copy)
-                return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
+                download_fragment(fragment, ctx_copy)
+                return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')

            self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
            with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
-                for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+                for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
                    ctx['fragment_filename_sanitized'] = frag_filename
                    ctx['fragment_index'] = frag_index
-                    result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+                    result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
                    if not result:
                        return False
        else:
            for fragment in fragments:
                if not interrupt_trigger[0]:
                    break
-                frag_content, frag_index = download_fragment(fragment, ctx)
-                result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+                download_fragment(fragment, ctx)
+                result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
                if not result:
                    return False

--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@ -263,9 +263,11 @@ def real_download(self, filename, info_dict):
            count = 0
            while count <= fragment_retries:
                try:
-                    success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+                    success = self._download_fragment(ctx, segment['url'], info_dict)
                    if not success:
                        return False
+                    frag_content = self._read_fragment(ctx)
+
                    if not extra_state['ism_track_written']:
                        tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
                        info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
--- a/yt_dlp/downloader/mhtml.py
+++ b/yt_dlp/downloader/mhtml.py
@ -171,9 +171,10 @@ def real_download(self, filename, info_dict):
                assert fragment_base_url
                fragment_url = urljoin(fragment_base_url, fragment['path'])

-            success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+            success = self._download_fragment(ctx, fragment_url, info_dict)
            if not success:
                continue
+            frag_content = self._read_fragment(ctx)

            mime_type = b'image/jpeg'
            if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
--- a/yt_dlp/downloader/youtube_live_chat.py
+++ b/yt_dlp/downloader/youtube_live_chat.py
@ -115,9 +115,10 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
            count = 0
            while count <= fragment_retries:
                try:
-                    success, raw_fragment = dl_fragment(url, request_data, headers)
+                    success = dl_fragment(url, request_data, headers)
                    if not success:
                        return False, None, None, None
+                    raw_fragment = self._read_fragment(ctx)
                    try:
                        data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
                    except RegexNotFoundError:
@ -145,9 +146,10 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None

        self._prepare_and_start_frag_download(ctx, info_dict)

-        success, raw_fragment = dl_fragment(info_dict['url'])
+        success = dl_fragment(info_dict['url'])
        if not success:
            return False
+        raw_fragment = self._read_fragment(ctx)
        try:
            data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
        except RegexNotFoundError: