mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-22 06:00:00 +00:00
Add option --download-sections
to download video partially
Closes #52, Closes #3932
This commit is contained in:
parent
e0ab98541c
commit
5ec1b6b716
7 changed files with 123 additions and 55 deletions
28
README.md
28
README.md
|
@ -93,6 +93,8 @@ # NEW FEATURES
|
|||
|
||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]`
|
||||
|
||||
* **Download time range**: Videos can be downloaded partially based on either timestamps or chapters using `--download-sections`
|
||||
|
||||
* **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters`
|
||||
|
||||
* **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used
|
||||
|
@ -555,6 +557,14 @@ ## Download Options:
|
|||
--no-hls-use-mpegts Do not use the mpegts container for HLS
|
||||
videos. This is default when not downloading
|
||||
live streams
|
||||
--download-sections REGEX Download only chapters whose title matches
|
||||
the given regular expression. Time ranges
|
||||
prefixed by a "*" can also be used in place
|
||||
of chapters to download the specified range.
|
||||
Eg: --download-sections "*10:15-15:00"
|
||||
--download-sections "intro". Needs ffmpeg.
|
||||
This option can be used multiple times to
|
||||
download multiple sections
|
||||
--downloader [PROTO:]NAME Name or path of the external downloader to
|
||||
use (optionally) prefixed by the protocols
|
||||
(http, ftp, m3u8, dash, rstp, rtmp, mms) to
|
||||
|
@ -997,18 +1007,16 @@ ## Post-Processing Options:
|
|||
--no-split-chapters Do not split video based on chapters
|
||||
(default)
|
||||
--remove-chapters REGEX Remove chapters whose title matches the
|
||||
given regular expression. Time ranges
|
||||
prefixed by a "*" can also be used in place
|
||||
of chapters to remove the specified range.
|
||||
Eg: --remove-chapters "*10:15-15:00"
|
||||
--remove-chapters "intro". This option can
|
||||
given regular expression. The syntax is the
|
||||
same as --download-sections. This option can
|
||||
be used multiple times
|
||||
--no-remove-chapters Do not remove any chapters from the file
|
||||
(default)
|
||||
--force-keyframes-at-cuts Force keyframes around chapters when
|
||||
removing/splitting them. This is slow due to
|
||||
needing a re-encode, but the resulting video
|
||||
may have fewer artifacts around the cuts
|
||||
--force-keyframes-at-cuts Force keyframes at cuts when
|
||||
downloading/splitting/removing sections.
|
||||
This is slow due to needing a re-encode, but
|
||||
the resulting video may have fewer artifacts
|
||||
around the cuts
|
||||
--no-force-keyframes-at-cuts Do not force keyframes around the chapters
|
||||
when cutting/splitting (default)
|
||||
--use-postprocessor NAME[:ARGS]
|
||||
|
@ -1286,7 +1294,7 @@ # OUTPUT TEMPLATE
|
|||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||
|
||||
Available for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
|
||||
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
|
||||
|
||||
- `section_title` (string): Title of the chapter
|
||||
- `section_number` (numeric): Number of the chapter within the file
|
||||
|
|
|
@ -417,8 +417,6 @@ class YoutubeDL:
|
|||
geo_bypass_ip_block:
|
||||
IP range in CIDR notation that will be used similarly to
|
||||
geo_bypass_country
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: A dictionary of protocol keys and the executable of the
|
||||
external downloader to use for it. The allowed protocols
|
||||
are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
|
||||
|
@ -435,6 +433,13 @@ class YoutubeDL:
|
|||
retry_sleep_functions: Dictionary of functions that takes the number of attempts
|
||||
as argument and returns the time to sleep in seconds.
|
||||
Allowed keys are 'http', 'fragment', 'file_access'
|
||||
download_ranges: A function that gets called for every video with the signature
|
||||
(info_dict, *, ydl) -> Iterable[Section].
|
||||
Only the returned sections will be downloaded. Each Section contains:
|
||||
* start_time: Start time of the section in seconds
|
||||
* end_time: End time of the section in seconds
|
||||
* title: Section title (Optional)
|
||||
* index: Section number (Optional)
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the downloader (see yt_dlp/downloader/common.py):
|
||||
|
@ -2653,16 +2658,34 @@ def is_wellformed(f):
|
|||
# Process what we can, even without any available formats.
|
||||
formats_to_download = [{}]
|
||||
|
||||
best_format = formats_to_download[-1]
|
||||
requested_ranges = self.params.get('download_ranges')
|
||||
if requested_ranges:
|
||||
requested_ranges = tuple(requested_ranges(info_dict, self))
|
||||
|
||||
best_format, downloaded_formats = formats_to_download[-1], []
|
||||
if download:
|
||||
if best_format:
|
||||
self.to_screen(
|
||||
f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
|
||||
+ ', '.join([f['format_id'] for f in formats_to_download]))
|
||||
def to_screen(*msg):
|
||||
self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
|
||||
|
||||
to_screen(f'Downloading {len(formats_to_download)} format(s):',
|
||||
(f['format_id'] for f in formats_to_download))
|
||||
if requested_ranges:
|
||||
to_screen(f'Downloading {len(requested_ranges)} time ranges:',
|
||||
(f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
|
||||
max_downloads_reached = False
|
||||
for i, fmt in enumerate(formats_to_download):
|
||||
formats_to_download[i] = new_info = self._copy_infodict(info_dict)
|
||||
|
||||
for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
|
||||
new_info = self._copy_infodict(info_dict)
|
||||
new_info.update(fmt)
|
||||
if chapter:
|
||||
new_info.update({
|
||||
'section_start': chapter.get('start_time'),
|
||||
'section_end': chapter.get('end_time', 0),
|
||||
'section_title': chapter.get('title'),
|
||||
'section_number': chapter.get('index'),
|
||||
})
|
||||
downloaded_formats.append(new_info)
|
||||
try:
|
||||
self.process_info(new_info)
|
||||
except MaxDownloadsReached:
|
||||
|
@ -2675,12 +2698,12 @@ def is_wellformed(f):
|
|||
if max_downloads_reached:
|
||||
break
|
||||
|
||||
write_archive = {f.get('__write_download_archive', False) for f in formats_to_download}
|
||||
write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
|
||||
assert write_archive.issubset({True, False, 'ignore'})
|
||||
if True in write_archive and False not in write_archive:
|
||||
self.record_download_archive(info_dict)
|
||||
|
||||
info_dict['requested_downloads'] = formats_to_download
|
||||
info_dict['requested_downloads'] = downloaded_formats
|
||||
info_dict = self.run_all_pps('after_video', info_dict)
|
||||
if max_downloads_reached:
|
||||
raise MaxDownloadsReached()
|
||||
|
@ -3036,6 +3059,17 @@ def existing_video_file(*filepaths):
|
|||
return file
|
||||
|
||||
success = True
|
||||
merger = FFmpegMergerPP(self)
|
||||
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
|
||||
if fd is not FFmpegFD and (
|
||||
info_dict.get('section_start') or info_dict.get('section_end')):
|
||||
msg = ('This format cannot be partially downloaded' if merger.available
|
||||
else 'You have requested downloading the video partially, but ffmpeg is not installed')
|
||||
if not self.params.get('ignoreerrors'):
|
||||
self.report_error(f'{msg}. Aborting due to --abort-on-error')
|
||||
return
|
||||
self.report_warning(f'{msg}. The entire video will be downloaded')
|
||||
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
|
||||
def compatible_formats(formats):
|
||||
|
@ -3091,9 +3125,6 @@ def correct_ext(filename, ext=new_ext):
|
|||
info_dict['__real_download'] = False
|
||||
|
||||
downloaded = []
|
||||
merger = FFmpegMergerPP(self)
|
||||
|
||||
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
|
||||
if dl_filename is not None:
|
||||
self.report_file_already_downloaded(dl_filename)
|
||||
elif fd:
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
GeoUtils,
|
||||
SameFileError,
|
||||
decodeOption,
|
||||
download_range_func,
|
||||
expand_path,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
|
@ -305,20 +306,25 @@ def validate_outtmpl(tmpl, msg):
|
|||
'Cannot download a video and extract audio into the same file! '
|
||||
f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template')
|
||||
|
||||
# Remove chapters
|
||||
remove_chapters_patterns, opts.remove_ranges = [], []
|
||||
for regex in opts.remove_chapters or []:
|
||||
if regex.startswith('*'):
|
||||
dur = list(map(parse_duration, regex[1:].split('-')))
|
||||
if len(dur) == 2 and all(t is not None for t in dur):
|
||||
opts.remove_ranges.append(tuple(dur))
|
||||
def parse_chapters(name, value):
|
||||
chapters, ranges = [], []
|
||||
for regex in value or []:
|
||||
if regex.startswith('*'):
|
||||
for range in regex[1:].split(','):
|
||||
dur = tuple(map(parse_duration, range.strip().split('-')))
|
||||
if len(dur) == 2 and all(t is not None for t in dur):
|
||||
ranges.append(dur)
|
||||
else:
|
||||
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end')
|
||||
continue
|
||||
raise ValueError(f'invalid --remove-chapters time range "{regex}". Must be of the form *start-end')
|
||||
try:
|
||||
remove_chapters_patterns.append(re.compile(regex))
|
||||
except re.error as err:
|
||||
raise ValueError(f'invalid --remove-chapters regex "{regex}" - {err}')
|
||||
opts.remove_chapters = remove_chapters_patterns
|
||||
try:
|
||||
chapters.append(re.compile(regex))
|
||||
except re.error as err:
|
||||
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
|
||||
return chapters, ranges
|
||||
|
||||
opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
|
||||
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
|
||||
|
||||
# Cookies from browser
|
||||
if opts.cookiesfrombrowser:
|
||||
|
@ -803,6 +809,8 @@ def parse_options(argv=None):
|
|||
'max_sleep_interval': opts.max_sleep_interval,
|
||||
'sleep_interval_subtitles': opts.sleep_interval_subtitles,
|
||||
'external_downloader': opts.external_downloader,
|
||||
'download_ranges': opts.download_ranges,
|
||||
'force_keyframes_at_cuts': opts.force_keyframes_at_cuts,
|
||||
'list_thumbnails': opts.list_thumbnails,
|
||||
'playlist_items': opts.playlist_items,
|
||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||
|
|
|
@ -84,8 +84,8 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
|
|||
if default is NO_DEFAULT:
|
||||
default = HttpFD
|
||||
|
||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||
# return FFmpegFD
|
||||
if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict):
|
||||
return FFmpegFD
|
||||
|
||||
info_dict['protocol'] = protocol
|
||||
downloaders = params.get('external_downloader')
|
||||
|
|
|
@ -384,13 +384,6 @@ def _call_downloader(self, tmpfilename, info_dict):
|
|||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||
args += ['-seekable', '1' if seekable else '0']
|
||||
|
||||
# start_time = info_dict.get('start_time') or 0
|
||||
# if start_time:
|
||||
# args += ['-ss', str(start_time)]
|
||||
# end_time = info_dict.get('end_time')
|
||||
# if end_time:
|
||||
# args += ['-t', str(end_time - start_time)]
|
||||
|
||||
http_headers = None
|
||||
if info_dict.get('http_headers'):
|
||||
youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||
|
@ -451,15 +444,21 @@ def _call_downloader(self, tmpfilename, info_dict):
|
|||
elif isinstance(conn, str):
|
||||
args += ['-rtmp_conn', conn]
|
||||
|
||||
start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
|
||||
|
||||
for i, url in enumerate(urls):
|
||||
# We need to specify headers for each http input stream
|
||||
# otherwise, it will only be applied to the first.
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/2696
|
||||
if http_headers is not None and re.match(r'^https?://', url):
|
||||
args += http_headers
|
||||
if start_time:
|
||||
args += ['-ss', str(start_time)]
|
||||
if end_time:
|
||||
args += ['-t', str(end_time - start_time)]
|
||||
|
||||
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
|
||||
|
||||
args += ['-c', 'copy']
|
||||
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
|
||||
args += ['-c', 'copy']
|
||||
|
||||
if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
|
||||
for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
|
||||
stream_number = fmt.get('manifest_stream_number', 0)
|
||||
|
|
|
@ -916,6 +916,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
help=(
|
||||
'Do not use the mpegts container for HLS videos. '
|
||||
'This is default when not downloading live streams'))
|
||||
downloader.add_option(
|
||||
'--download-sections',
|
||||
metavar='REGEX', dest='download_ranges', action='append',
|
||||
help=(
|
||||
'Download only chapters whose title matches the given regular expression. '
|
||||
'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
|
||||
'Eg: --download-sections "*10:15-15:00" --download-sections "intro". '
|
||||
'Needs ffmpeg. This option can be used multiple times to download multiple sections'))
|
||||
downloader.add_option(
|
||||
'--downloader', '--external-downloader',
|
||||
dest='external_downloader', metavar='[PROTO:]NAME', default={}, type='str',
|
||||
|
@ -1631,9 +1639,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
metavar='REGEX', dest='remove_chapters', action='append',
|
||||
help=(
|
||||
'Remove chapters whose title matches the given regular expression. '
|
||||
'Time ranges prefixed by a "*" can also be used in place of chapters to remove the specified range. '
|
||||
'Eg: --remove-chapters "*10:15-15:00" --remove-chapters "intro". '
|
||||
'This option can be used multiple times'))
|
||||
'The syntax is the same as --download-sections. This option can be used multiple times'))
|
||||
postproc.add_option(
|
||||
'--no-remove-chapters', dest='remove_chapters', action='store_const', const=None,
|
||||
help='Do not remove any chapters from the file (default)')
|
||||
|
@ -1641,9 +1647,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
'--force-keyframes-at-cuts',
|
||||
action='store_true', dest='force_keyframes_at_cuts', default=False,
|
||||
help=(
|
||||
'Force keyframes around chapters when removing/splitting them. '
|
||||
'This is slow due to needing a re-encode, but '
|
||||
'the resulting video may have fewer artifacts around the cuts'))
|
||||
'Force keyframes at cuts when downloading/splitting/removing sections. '
|
||||
'This is slow due to needing a re-encode, but the resulting video may have fewer artifacts around the cuts'))
|
||||
postproc.add_option(
|
||||
'--no-force-keyframes-at-cuts',
|
||||
action='store_false', dest='force_keyframes_at_cuts',
|
||||
|
|
|
@ -3495,6 +3495,23 @@ def _match_func(info_dict, incomplete=False):
|
|||
return _match_func
|
||||
|
||||
|
||||
def download_range_func(chapters, ranges):
|
||||
def inner(info_dict, ydl):
|
||||
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
|
||||
else 'Chapter information is unavailable')
|
||||
for regex in chapters or []:
|
||||
for i, chapter in enumerate(info_dict.get('chapters') or []):
|
||||
if re.search(regex, chapter['title']):
|
||||
warning = None
|
||||
yield {**chapter, 'index': i}
|
||||
if warning:
|
||||
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
|
||||
|
||||
yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
|
||||
|
||||
return inner
|
||||
|
||||
|
||||
def parse_dfxp_time_expr(time_expr):
|
||||
if not time_expr:
|
||||
return
|
||||
|
|
Loading…
Reference in a new issue