From 083c9df93b08a24e967b68fbdd2f4a71ae74c8c8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 23 Jan 2015 00:04:05 +0100 Subject: [PATCH] [YoutubeDL] Allow filtering by properties (Fixes #4584) --- test/test_YoutubeDL.py | 55 +++++++++++++++++++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 54 ++++++++++++++++++++++++++++++++++++++++ youtube_dl/options.py | 11 +++++++++ 3 files changed, 120 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 85d87f2c3..678b9f7d1 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -281,6 +281,61 @@ def test_youtube_format_selection(self): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1id) + def test_format_filtering(self): + formats = [ + {'format_id': 'A', 'filesize': 500, 'width': 1000}, + {'format_id': 'B', 'filesize': 1000, 'width': 500}, + {'format_id': 'C', 'filesize': 1000, 'width': 400}, + {'format_id': 'D', 'filesize': 2000, 'width': 600}, + {'format_id': 'E', 'filesize': 3000}, + {'format_id': 'F'}, + {'format_id': 'G', 'filesize': 1000000}, + ] + for f in formats: + f['url'] = 'http://_/' + f['ext'] = 'unknown' + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best[filesize<3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'D') + + ydl = YDL({'format': 'best[filesize<=3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'E') + + ydl = YDL({'format': 'best[filesize <= ? 3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'F') + + ydl = YDL({'format': 'best [filesize = 1000] [width>450]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'B') + + ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'C') + + ydl = YDL({'format': '[filesize>?1]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'G') + + ydl = YDL({'format': '[filesize<1M]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'E') + + ydl = YDL({'format': '[filesize<1MiB]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'G') + def test_add_extra_info(self): test_dict = { 'extractor': 'Foo', diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 772fddd45..8ef74e414 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -10,6 +10,7 @@ import itertools import json import locale +import operator import os import platform import re @@ -49,6 +50,7 @@ make_HTTPS_handler, MaxDownloadsReached, PagedList, + parse_filesize, PostProcessingError, platform_name, preferredencoding, @@ -768,7 +770,59 @@ def _fixup(r): else: raise Exception('Invalid result type: %s' % result_type) + def _apply_format_filter(self, format_spec, available_formats): + " Returns a tuple of the remaining format_spec and filtered formats " + + OPERATORS = { + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, + '=': operator.eq, + '!=': operator.ne, + } + operator_rex = re.compile(r'''(?x)\s*\[ + (?Pwidth|height|tbr|abr|vbr|filesize) + \s*(?P%s)(?P\s*\?)?\s* + (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) + \]$ + ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + m = operator_rex.search(format_spec) + if not m: + raise ValueError('Invalid format specification %r' % format_spec) + + try: + comparison_value = int(m.group('value')) + except ValueError: + comparison_value = parse_filesize(m.group('value')) + if comparison_value is None: + comparison_value = parse_filesize(m.group('value') + 'B') + if comparison_value is None: + raise ValueError( + 'Invalid value %r in format specification %r' % ( + m.group('value'), format_spec)) + op = OPERATORS[m.group('op')] + + def _filter(f): + actual_value = f.get(m.group('key')) + if actual_value is None: + return m.group('none_inclusive') + return op(actual_value, comparison_value) + new_formats = [f for f in available_formats if _filter(f)] + + new_format_spec = format_spec[:-len(m.group(0))] + if not new_format_spec: + new_format_spec = 'best' + + return (new_format_spec, new_formats) + def select_format(self, format_spec, available_formats): + while format_spec.endswith(']'): + format_spec, available_formats = self._apply_format_filter( + format_spec, available_formats) + if not available_formats: + return None + if format_spec == 'best' or format_spec is None: return available_formats[-1] elif format_spec == 'worst': diff --git a/youtube_dl/options.py b/youtube_dl/options.py index f25c12e52..fd7b400b2 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -289,6 +289,17 @@ def _hide_login_info(opts): 'extensions aac, m4a, mp3, mp4, ogg, wav, webm. ' 'You can also use the special names "best",' ' "bestvideo", "bestaudio", "worst". ' + ' You can filter the video results by putting a condition in' + ' brackets, as in -f "best[height=720]"' + ' (or -f "[filesize>10M]"). ' + ' This works for filesize, height, width, tbr, abr, and vbr' + ' and the comparisons <, <=, >, >=, =, != .' + ' Formats for which the value is not known are excluded unless you' + ' put a question mark (?) after the operator.' + ' You can combine format filters, so ' + '-f "[height <=? 720][tbr>500]" ' + 'selects up to 720p videos (or videos where the height is not ' + 'known) with a bitrate of at least 500 KBit/s.' ' By default, youtube-dl will pick the best quality.' ' Use commas to download multiple audio formats, such as' ' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'