mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-22 02:15:12 +00:00
Ability to load playlist infojson
* If `--no-clean-infojson` is given, the video ids are saved/loaded from in the infojson along with their playlist index * If a video entry that was not saved is requested, we fallback to using `webpage_url` to re-extract the entries Related: https://github.com/yt-dlp/yt-dlp/issues/190#issuecomment-804921024
This commit is contained in:
parent
394dcd4486
commit
498f560638
2 changed files with 84 additions and 63 deletions
|
@ -60,6 +60,7 @@
|
|||
encode_compat_str,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
EntryNotInPlaylist,
|
||||
ExistingVideoReached,
|
||||
expand_path,
|
||||
ExtractorError,
|
||||
|
@ -1180,48 +1181,16 @@ def __process_playlist(self, ie_result, download):
|
|||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
if self.params.get('allow_playlist_files', True):
|
||||
ie_copy = {
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': 0
|
||||
}
|
||||
ie_copy.update(dict(ie_result))
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
|
||||
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
||||
self.to_screen('[info] Playlist metadata is already present')
|
||||
else:
|
||||
playlist_info = dict(ie_result)
|
||||
# playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
|
||||
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
write_json_file(self.filter_requested_info(playlist_info, self.params.get('clean_infojson', True)), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
descfn = self.prepare_filename(ie_copy, 'pl_description')
|
||||
if not self._ensure_dir_exists(encodeFilename(descfn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
||||
self.to_screen('[info] Playlist description is already present')
|
||||
elif ie_result.get('description') is None:
|
||||
self.report_warning('There\'s no playlist description to write.')
|
||||
else:
|
||||
try:
|
||||
self.to_screen('[info] Writing playlist description to: ' + descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(ie_result['description'])
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist description file ' + descfn)
|
||||
return
|
||||
if 'entries' not in ie_result:
|
||||
raise EntryNotInPlaylist()
|
||||
incomplete_entries = bool(ie_result.get('requested_entries'))
|
||||
if incomplete_entries:
|
||||
def fill_missing_entries(entries, indexes):
|
||||
ret = [None] * max(*indexes)
|
||||
for i, entry in zip(indexes, entries):
|
||||
ret[i - 1] = entry
|
||||
return ret
|
||||
ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
|
||||
|
||||
playlist_results = []
|
||||
|
||||
|
@ -1248,25 +1217,20 @@ def iter_playlistitems(format):
|
|||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
for i in playlistitems:
|
||||
if -num_entries < i <= num_entries:
|
||||
yield list_ie_entries[i - 1]
|
||||
elif incomplete_entries:
|
||||
raise EntryNotInPlaylist()
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
entries = list(make_playlistitems_entries(ie_entries))
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
entries = []
|
||||
|
@ -1278,25 +1242,73 @@ def report_download(num_entries):
|
|||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
msg = 'Downloading %d videos' % n_entries
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems))))
|
||||
entries = list(make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems)))))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
msg = 'Downloading %d videos' % n_entries
|
||||
|
||||
if any((entry is None for entry in entries)):
|
||||
raise EntryNotInPlaylist()
|
||||
if not playlistitems and (playliststart or playlistend):
|
||||
playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
|
||||
ie_result['entries'] = entries
|
||||
ie_result['requested_entries'] = playlistitems
|
||||
|
||||
if self.params.get('allow_playlist_files', True):
|
||||
ie_copy = {
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': 0
|
||||
}
|
||||
ie_copy.update(dict(ie_result))
|
||||
|
||||
if self.params.get('writeinfojson', False):
|
||||
infofn = self.prepare_filename(ie_copy, 'pl_infojson')
|
||||
if not self._ensure_dir_exists(encodeFilename(infofn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
|
||||
self.to_screen('[info] Playlist metadata is already present')
|
||||
else:
|
||||
self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
|
||||
try:
|
||||
write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
|
||||
|
||||
if self.params.get('writedescription', False):
|
||||
descfn = self.prepare_filename(ie_copy, 'pl_description')
|
||||
if not self._ensure_dir_exists(encodeFilename(descfn)):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
|
||||
self.to_screen('[info] Playlist description is already present')
|
||||
elif ie_result.get('description') is None:
|
||||
self.report_warning('There\'s no playlist description to write.')
|
||||
else:
|
||||
try:
|
||||
self.to_screen('[info] Writing playlist description to: ' + descfn)
|
||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(ie_result['description'])
|
||||
except (OSError, IOError):
|
||||
self.report_error('Cannot write playlist description file ' + descfn)
|
||||
return
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
|
@ -1310,7 +1322,7 @@ def report_download(num_entries):
|
|||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
|
@ -2524,10 +2536,10 @@ def download_with_info_file(self, info_filename):
|
|||
[info_filename], mode='r',
|
||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
||||
# FileInput doesn't have a read method, we can't call json.load
|
||||
info = self.filter_requested_info(json.loads('\n'.join(f)))
|
||||
info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
|
||||
try:
|
||||
self.process_ie_result(info, download=True)
|
||||
except DownloadError:
|
||||
except (DownloadError, EntryNotInPlaylist):
|
||||
webpage_url = info.get('webpage_url')
|
||||
if webpage_url is not None:
|
||||
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
|
||||
|
@ -2542,7 +2554,7 @@ def filter_requested_info(info_dict, actually_filter=True):
|
|||
info_dict['epoch'] = int(time.time())
|
||||
return info_dict
|
||||
exceptions = {
|
||||
'remove': ['requested_formats', 'requested_subtitles', 'filepath', 'entries'],
|
||||
'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
|
||||
'keep': ['_type'],
|
||||
}
|
||||
keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
|
||||
|
|
|
@ -2423,6 +2423,15 @@ def __init__(self, msg, exc_info=None):
|
|||
self.exc_info = exc_info
|
||||
|
||||
|
||||
class EntryNotInPlaylist(YoutubeDLError):
|
||||
"""Entry not in playlist exception.
|
||||
|
||||
This exception will be thrown by YoutubeDL when a requested entry
|
||||
is not found in the playlist info_dict
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class SameFileError(YoutubeDLError):
|
||||
"""Same File exception.
|
||||
|
||||
|
|
Loading…
Reference in a new issue