mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-22 06:00:00 +00:00
Improve --download-sections
* Support negative time-ranges * Add `*from-url` to obey time-ranges in URL Closes #7248
This commit is contained in:
parent
71dc18fa29
commit
b4e0d75848
5 changed files with 74 additions and 32 deletions
14
README.md
14
README.md
|
@ -610,12 +610,14 @@ ## Download Options:
|
||||||
--no-hls-use-mpegts Do not use the mpegts container for HLS
|
--no-hls-use-mpegts Do not use the mpegts container for HLS
|
||||||
videos. This is default when not downloading
|
videos. This is default when not downloading
|
||||||
live streams
|
live streams
|
||||||
--download-sections REGEX Download only chapters whose title matches
|
--download-sections REGEX Download only chapters that match the
|
||||||
the given regular expression. Time ranges
|
regular expression. A "*" prefix denotes
|
||||||
prefixed by a "*" can also be used in place
|
time-range instead of chapter. Negative
|
||||||
of chapters to download the specified range.
|
timestamps are calculated from the end.
|
||||||
Needs ffmpeg. This option can be used
|
"*from-url" can be used to download between
|
||||||
multiple times to download multiple
|
the "start_time" and "end_time" extracted
|
||||||
|
from the URL. Needs ffmpeg. This option can
|
||||||
|
be used multiple times to download multiple
|
||||||
sections, e.g. --download-sections
|
sections, e.g. --download-sections
|
||||||
"*10:15-inf" --download-sections "intro"
|
"*10:15-inf" --download-sections "intro"
|
||||||
--downloader [PROTO:]NAME Name or path of the external downloader to
|
--downloader [PROTO:]NAME Name or path of the external downloader to
|
||||||
|
|
|
@ -2806,11 +2806,13 @@ def to_screen(*msg):
|
||||||
new_info.update(fmt)
|
new_info.update(fmt)
|
||||||
offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
|
offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
|
||||||
end_time = offset + min(chapter.get('end_time', duration), duration)
|
end_time = offset + min(chapter.get('end_time', duration), duration)
|
||||||
|
# duration may not be accurate. So allow deviations <1sec
|
||||||
|
if end_time == float('inf') or end_time > offset + duration + 1:
|
||||||
|
end_time = None
|
||||||
if chapter or offset:
|
if chapter or offset:
|
||||||
new_info.update({
|
new_info.update({
|
||||||
'section_start': offset + chapter.get('start_time', 0),
|
'section_start': offset + chapter.get('start_time', 0),
|
||||||
# duration may not be accurate. So allow deviations <1sec
|
'section_end': end_time,
|
||||||
'section_end': end_time if end_time <= offset + duration + 1 else None,
|
|
||||||
'section_title': chapter.get('title'),
|
'section_title': chapter.get('title'),
|
||||||
'section_number': chapter.get('index'),
|
'section_number': chapter.get('index'),
|
||||||
})
|
})
|
||||||
|
|
|
@ -320,26 +320,49 @@ def validate_outtmpl(tmpl, msg):
|
||||||
opts.skip_download = None
|
opts.skip_download = None
|
||||||
del opts.outtmpl['default']
|
del opts.outtmpl['default']
|
||||||
|
|
||||||
def parse_chapters(name, value):
|
def parse_chapters(name, value, advanced=False):
|
||||||
chapters, ranges = [], []
|
|
||||||
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
|
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
|
||||||
|
TIMESTAMP_RE = r'''(?x)(?:
|
||||||
|
(?P<start_sign>-?)(?P<start>[^-]+)
|
||||||
|
)?\s*-\s*(?:
|
||||||
|
(?P<end_sign>-?)(?P<end>[^-]+)
|
||||||
|
)?'''
|
||||||
|
|
||||||
|
chapters, ranges, from_url = [], [], False
|
||||||
for regex in value or []:
|
for regex in value or []:
|
||||||
if regex.startswith('*'):
|
if advanced and regex == '*from-url':
|
||||||
for range_ in map(str.strip, regex[1:].split(',')):
|
from_url = True
|
||||||
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
|
|
||||||
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
|
|
||||||
if None in (dur or [None]):
|
|
||||||
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
|
|
||||||
ranges.append(dur)
|
|
||||||
continue
|
continue
|
||||||
|
elif not regex.startswith('*'):
|
||||||
try:
|
try:
|
||||||
chapters.append(re.compile(regex))
|
chapters.append(re.compile(regex))
|
||||||
except re.error as err:
|
except re.error as err:
|
||||||
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
|
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
|
||||||
return chapters, ranges
|
continue
|
||||||
|
|
||||||
opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
|
for range_ in map(str.strip, regex[1:].split(',')):
|
||||||
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
|
mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
|
||||||
|
dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')]
|
||||||
|
signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
|
||||||
|
|
||||||
|
err = None
|
||||||
|
if None in (dur or [None]):
|
||||||
|
err = 'Must be of the form "*start-end"'
|
||||||
|
elif not advanced and any(signs):
|
||||||
|
err = 'Negative timestamps are not allowed'
|
||||||
|
else:
|
||||||
|
dur[0] *= -1 if signs[0] else 1
|
||||||
|
dur[1] *= -1 if signs[1] else 1
|
||||||
|
if dur[1] == float('-inf'):
|
||||||
|
err = '"-inf" is not a valid end'
|
||||||
|
if err:
|
||||||
|
raise ValueError(f'invalid {name} time range "{regex}". {err}')
|
||||||
|
ranges.append(dur)
|
||||||
|
|
||||||
|
return chapters, ranges, from_url
|
||||||
|
|
||||||
|
opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
|
||||||
|
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
|
||||||
|
|
||||||
# Cookies from browser
|
# Cookies from browser
|
||||||
if opts.cookiesfrombrowser:
|
if opts.cookiesfrombrowser:
|
||||||
|
|
|
@ -1012,8 +1012,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
'--download-sections',
|
'--download-sections',
|
||||||
metavar='REGEX', dest='download_ranges', action='append',
|
metavar='REGEX', dest='download_ranges', action='append',
|
||||||
help=(
|
help=(
|
||||||
'Download only chapters whose title matches the given regular expression. '
|
'Download only chapters that match the regular expression. '
|
||||||
'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
|
'A "*" prefix denotes time-range instead of chapter. Negative timestamps are calculated from the end. '
|
||||||
|
'"*from-url" can be used to download between the "start_time" and "end_time" extracted from the URL. '
|
||||||
'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
|
'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
|
||||||
'e.g. --download-sections "*10:15-inf" --download-sections "intro"'))
|
'e.g. --download-sections "*10:15-inf" --download-sections "intro"'))
|
||||||
downloader.add_option(
|
downloader.add_option(
|
||||||
|
|
|
@ -3753,11 +3753,11 @@ def _match_func(info_dict, incomplete=False):
|
||||||
|
|
||||||
|
|
||||||
class download_range_func:
|
class download_range_func:
|
||||||
def __init__(self, chapters, ranges):
|
def __init__(self, chapters, ranges, from_info=False):
|
||||||
self.chapters, self.ranges = chapters, ranges
|
self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
|
||||||
|
|
||||||
def __call__(self, info_dict, ydl):
|
def __call__(self, info_dict, ydl):
|
||||||
if not self.ranges and not self.chapters:
|
if not any((self.ranges, self.chapters, self.from_info)):
|
||||||
yield {}
|
yield {}
|
||||||
|
|
||||||
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
|
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
|
||||||
|
@ -3770,7 +3770,21 @@ def __call__(self, info_dict, ydl):
|
||||||
if self.chapters and warning:
|
if self.chapters and warning:
|
||||||
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
|
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
|
||||||
|
|
||||||
yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
|
for start, end in self.ranges or []:
|
||||||
|
yield {
|
||||||
|
'start_time': self._handle_negative_timestamp(start, info_dict),
|
||||||
|
'end_time': self._handle_negative_timestamp(end, info_dict),
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
|
||||||
|
yield {
|
||||||
|
'start_time': info_dict.get('start_time'),
|
||||||
|
'end_time': info_dict.get('end_time'),
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _handle_negative_timestamp(time, info):
|
||||||
|
return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return (isinstance(other, download_range_func)
|
return (isinstance(other, download_range_func)
|
||||||
|
|
Loading…
Reference in a new issue