mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-03 06:01:02 +00:00
[YoutubeDL] Add generic video filtering (Fixes #4916)
This functionality is intended to eventually encompass the current format filtering.
This commit is contained in:
parent
8829650513
commit
347de4931c
5 changed files with 147 additions and 2 deletions
|
@ -53,6 +53,7 @@
|
||||||
version_tuple,
|
version_tuple,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
render_table,
|
render_table,
|
||||||
|
match_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -459,6 +460,37 @@ def test_render_table(self):
|
||||||
'123 4\n'
|
'123 4\n'
|
||||||
'9999 51')
|
'9999 51')
|
||||||
|
|
||||||
|
def test_match_str(self):
|
||||||
|
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
|
||||||
|
self.assertFalse(match_str('xy', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('!xy', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('!x', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x', {'x': 0}))
|
||||||
|
self.assertFalse(match_str('x>0', {'x': 0}))
|
||||||
|
self.assertFalse(match_str('x>0', {}))
|
||||||
|
self.assertTrue(match_str('x>?0', {}))
|
||||||
|
self.assertTrue(match_str('x>1K', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('x>2K', {'x': 1200}))
|
||||||
|
self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200}))
|
||||||
|
self.assertFalse(match_str('y=a212', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
|
||||||
|
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 90, 'description': 'foo'}))
|
||||||
|
self.assertTrue(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'description': 'foo'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'dislike_count': 60, 'description': 'foo'}))
|
||||||
|
self.assertFalse(match_str(
|
||||||
|
'like_count > 100 & dislike_count <? 50 & description',
|
||||||
|
{'like_count': 190, 'dislike_count': 10}))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -228,6 +228,11 @@ class YoutubeDL(object):
|
||||||
external_downloader: Executable of the external downloader to call.
|
external_downloader: Executable of the external downloader to call.
|
||||||
listformats: Print an overview of available video formats and exit.
|
listformats: Print an overview of available video formats and exit.
|
||||||
list_thumbnails: Print a table of all thumbnails and exit.
|
list_thumbnails: Print a table of all thumbnails and exit.
|
||||||
|
match_filter: A function that gets called with the info_dict of
|
||||||
|
every video.
|
||||||
|
If it returns a message, the video is ignored.
|
||||||
|
If it returns None, the video is downloaded.
|
||||||
|
match_filter_func in utils.py is one example for this.
|
||||||
|
|
||||||
|
|
||||||
The following parameters are not used by YoutubeDL itself, they are used by
|
The following parameters are not used by YoutubeDL itself, they are used by
|
||||||
|
@ -583,9 +588,16 @@ def _match_entry(self, info_dict):
|
||||||
if max_views is not None and view_count > max_views:
|
if max_views is not None and view_count > max_views:
|
||||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||||
return 'Skipping "%s" because it is age restricted' % title
|
return 'Skipping "%s" because it is age restricted' % video_title
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return '%s has already been recorded in archive' % video_title
|
return '%s has already been recorded in archive' % video_title
|
||||||
|
|
||||||
|
match_filter = self.params.get('match_filter')
|
||||||
|
if match_filter is not None:
|
||||||
|
ret = match_filter(info_dict)
|
||||||
|
if ret is not None:
|
||||||
|
return ret
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -23,9 +23,10 @@
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
DateRange,
|
DateRange,
|
||||||
DEFAULT_OUTTMPL,
|
|
||||||
decodeOption,
|
decodeOption,
|
||||||
|
DEFAULT_OUTTMPL,
|
||||||
DownloadError,
|
DownloadError,
|
||||||
|
match_filter_func,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
|
@ -247,6 +248,9 @@ def _real_main(argv=None):
|
||||||
xattr # Confuse flake8
|
xattr # Confuse flake8
|
||||||
except ImportError:
|
except ImportError:
|
||||||
parser.error('setting filesize xattr requested but python-xattr is not available')
|
parser.error('setting filesize xattr requested but python-xattr is not available')
|
||||||
|
match_filter = (
|
||||||
|
None if opts.match_filter is None
|
||||||
|
else match_filter_func(opts.match_filter))
|
||||||
|
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'usenetrc': opts.usenetrc,
|
'usenetrc': opts.usenetrc,
|
||||||
|
@ -344,6 +348,7 @@ def _real_main(argv=None):
|
||||||
'list_thumbnails': opts.list_thumbnails,
|
'list_thumbnails': opts.list_thumbnails,
|
||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
'xattr_set_filesize': opts.xattr_set_filesize,
|
'xattr_set_filesize': opts.xattr_set_filesize,
|
||||||
|
'match_filter': match_filter,
|
||||||
}
|
}
|
||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
|
|
|
@ -244,6 +244,25 @@ def _hide_login_info(opts):
|
||||||
'--max-views',
|
'--max-views',
|
||||||
metavar='COUNT', dest='max_views', default=None, type=int,
|
metavar='COUNT', dest='max_views', default=None, type=int,
|
||||||
help='Do not download any videos with more than COUNT views')
|
help='Do not download any videos with more than COUNT views')
|
||||||
|
selection.add_option(
|
||||||
|
'--match-filter',
|
||||||
|
metavar='FILTER', dest='match_filter', default=None,
|
||||||
|
help=(
|
||||||
|
'(Experimental) Generic video filter. '
|
||||||
|
'Specify any key (see help for -o for a list of available keys) to'
|
||||||
|
' match if the key is present, '
|
||||||
|
'!key to check if the key is not present,'
|
||||||
|
'key > NUMBER (like "comment_count > 12", also works with '
|
||||||
|
'>=, <, <=, !=, =) to compare against a number, and '
|
||||||
|
'& to require multiple matches. '
|
||||||
|
'Values which are not known are excluded unless you'
|
||||||
|
' put a question mark (?) after the operator.'
|
||||||
|
'For example, to only match videos that have been liked more than '
|
||||||
|
'100 times and disliked less than 50 times (or the dislike '
|
||||||
|
'functionality is not available at the given service), but who '
|
||||||
|
'also have a description, use --match-filter '
|
||||||
|
'"like_count > 100 & dislike_count <? 50 & description" .'
|
||||||
|
))
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--no-playlist',
|
'--no-playlist',
|
||||||
action='store_true', dest='noplaylist', default=False,
|
action='store_true', dest='noplaylist', default=False,
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
import math
|
import math
|
||||||
|
import operator
|
||||||
import os
|
import os
|
||||||
import pipes
|
import pipes
|
||||||
import platform
|
import platform
|
||||||
|
@ -1678,3 +1679,79 @@ def render_table(header_row, data):
|
||||||
max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
|
max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
|
||||||
format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
|
format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
|
||||||
return '\n'.join(format_str % tuple(row) for row in table)
|
return '\n'.join(format_str % tuple(row) for row in table)
|
||||||
|
|
||||||
|
|
||||||
|
def _match_one(filter_part, dct):
|
||||||
|
COMPARISON_OPERATORS = {
|
||||||
|
'<': operator.lt,
|
||||||
|
'<=': operator.le,
|
||||||
|
'>': operator.gt,
|
||||||
|
'>=': operator.ge,
|
||||||
|
'=': operator.eq,
|
||||||
|
'!=': operator.ne,
|
||||||
|
}
|
||||||
|
operator_rex = re.compile(r'''(?x)\s*
|
||||||
|
(?P<key>[a-z_]+)
|
||||||
|
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||||
|
(?:
|
||||||
|
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
|
||||||
|
(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
|
||||||
|
)
|
||||||
|
\s*$
|
||||||
|
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
|
||||||
|
m = operator_rex.search(filter_part)
|
||||||
|
if m:
|
||||||
|
op = COMPARISON_OPERATORS[m.group('op')]
|
||||||
|
if m.group('strval') is not None:
|
||||||
|
if m.group('op') not in ('=', '!='):
|
||||||
|
raise ValueError(
|
||||||
|
'Operator %s does not support string values!' % m.group('op'))
|
||||||
|
comparison_value = m.group('strval')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
comparison_value = int(m.group('intval'))
|
||||||
|
except ValueError:
|
||||||
|
comparison_value = parse_filesize(m.group('intval'))
|
||||||
|
if comparison_value is None:
|
||||||
|
comparison_value = parse_filesize(m.group('intval') + 'B')
|
||||||
|
if comparison_value is None:
|
||||||
|
raise ValueError(
|
||||||
|
'Invalid integer value %r in filter part %r' % (
|
||||||
|
m.group('intval'), filter_part))
|
||||||
|
actual_value = dct.get(m.group('key'))
|
||||||
|
if actual_value is None:
|
||||||
|
return m.group('none_inclusive')
|
||||||
|
return op(actual_value, comparison_value)
|
||||||
|
|
||||||
|
UNARY_OPERATORS = {
|
||||||
|
'': lambda v: v is not None,
|
||||||
|
'!': lambda v: v is None,
|
||||||
|
}
|
||||||
|
operator_rex = re.compile(r'''(?x)\s*
|
||||||
|
(?P<op>%s)\s*(?P<key>[a-z_]+)
|
||||||
|
\s*$
|
||||||
|
''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
|
||||||
|
m = operator_rex.search(filter_part)
|
||||||
|
if m:
|
||||||
|
op = UNARY_OPERATORS[m.group('op')]
|
||||||
|
actual_value = dct.get(m.group('key'))
|
||||||
|
return op(actual_value)
|
||||||
|
|
||||||
|
raise ValueError('Invalid filter part %r' % filter_part)
|
||||||
|
|
||||||
|
|
||||||
|
def match_str(filter_str, dct):
|
||||||
|
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
|
||||||
|
|
||||||
|
return all(
|
||||||
|
_match_one(filter_part, dct) for filter_part in filter_str.split('&'))
|
||||||
|
|
||||||
|
|
||||||
|
def match_filter_func(filter_str):
|
||||||
|
def _match_func(info_dict):
|
||||||
|
if match_str(filter_str, info_dict):
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
video_title = info_dict.get('title', info_dict.get('id', 'video'))
|
||||||
|
return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
|
||||||
|
return _match_func
|
||||||
|
|
Loading…
Reference in a new issue