From 560738f34de4df6eaf82290fd503def3f366f878 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 15 Jun 2022 18:00:34 +0530 Subject: [PATCH] [extractor] Import `_ALL_CLASSES` lazily This significantly speeds up `import yt_dlp` in the absence of `lazy_extractors` --- devscripts/make_lazy_extractors.py | 2 +- yt_dlp/YoutubeDL.py | 6 +++-- yt_dlp/__init__.py | 6 ++++- yt_dlp/compat/compat_utils.py | 5 +++- yt_dlp/extractor/__init__.py | 37 ++++++++++-------------------- yt_dlp/extractor/extractors.py | 23 +++++++++++++++++++ 6 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 yt_dlp/extractor/extractors.py diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index cd1985c8e..39d4646d0 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -53,7 +53,7 @@ def get_all_ies(): if os.path.exists(PLUGINS_DIRNAME): os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME) try: - from yt_dlp.extractor import _ALL_CLASSES + from yt_dlp.extractor.extractors import _ALL_CLASSES finally: if os.path.exists(BLOCKED_DIRNAME): os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 7ba6441e1..1932af3fe 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -38,8 +38,6 @@ from .cookies import load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version -from .extractor import _LAZY_LOADER -from .extractor import _PLUGIN_CLASSES as plugin_extractors from .extractor import gen_extractor_classes, get_info_extractor from .extractor.openload import PhantomJSwrapper from .minicurses import format_text @@ -3659,6 +3657,10 @@ def print_debug_header(self): if not self.params.get('verbose'): return + # These imports can be slow. So import them only as needed + from .extractor.extractors import _LAZY_LOADER + from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors + def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) if not supports_terminal_sequences(stream): diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 4217601bf..73ef03662 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -12,7 +12,7 @@ from .compat import compat_getpass, compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader import FileDownloader -from .extractor import GenericIE, list_extractor_classes +from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO from .extractor.common import InfoExtractor from .options import parseOpts @@ -79,6 +79,10 @@ def get_urls(urls, batchfile, verbose): def print_extractor_information(opts, urls): + # Importing GenericIE is currently slow since it imports other extractors + # TODO: Move this back to module level after generalization of embed detection + from .extractor.generic import GenericIE + out = '' if opts.list_extractors: urls = dict.fromkeys(urls, False) diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index b1d58f5b9..8da4fcc0a 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -33,7 +33,7 @@ def _is_package(module): def passthrough_module(parent, child, *, callback=lambda _: None): parent_module = importlib.import_module(parent) - child_module = importlib.import_module(child, parent) + child_module = None # Import child module only as needed class PassthroughModule(types.ModuleType): def __getattr__(self, attr): @@ -41,6 +41,9 @@ def __getattr__(self, attr): with contextlib.suppress(ImportError): return importlib.import_module(f'.{attr}', parent) + nonlocal child_module + child_module = child_module or importlib.import_module(child, parent) + ret = _NO_ATTRIBUTE with contextlib.suppress(AttributeError): ret = getattr(child_module, attr) diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index db7f3874b..6bfa4bd7b 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -1,32 +1,15 @@ -import contextlib -import os +from ..compat.compat_utils import passthrough_module -from ..utils import load_plugins - -_LAZY_LOADER = False -if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): - with contextlib.suppress(ImportError): - from .lazy_extractors import * # noqa: F403 - from .lazy_extractors import _ALL_CLASSES - _LAZY_LOADER = True - -if not _LAZY_LOADER: - from ._extractors import * # noqa: F403 - _ALL_CLASSES = [ # noqa: F811 - klass - for name, klass in globals().items() - if name.endswith('IE') and name != 'GenericIE' - ] - _ALL_CLASSES.append(GenericIE) # noqa: F405 - -_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) -_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES +passthrough_module(__name__, '.extractors') +del passthrough_module def gen_extractor_classes(): """ Return a list of supported extractors. The order does matter; the first extractor matched is the one handling the URL. """ + from .extractors import _ALL_CLASSES + return _ALL_CLASSES @@ -39,10 +22,12 @@ def gen_extractors(): def list_extractor_classes(age_limit=None): """Return a list of extractors that are suitable for the given age, sorted by extractor name""" + from .generic import GenericIE + yield from sorted(filter( - lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405 + lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) - yield GenericIE # noqa: F405 + yield GenericIE def list_extractors(age_limit=None): @@ -52,4 +37,6 @@ def list_extractors(age_limit=None): def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" - return globals()[ie_name + 'IE'] + from . import extractors + + return getattr(extractors, f'{ie_name}IE') diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py new file mode 100644 index 000000000..32818a024 --- /dev/null +++ b/yt_dlp/extractor/extractors.py @@ -0,0 +1,23 @@ +import contextlib +import os + +from ..utils import load_plugins + +_LAZY_LOADER = False +if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + with contextlib.suppress(ImportError): + from .lazy_extractors import * # noqa: F403 + from .lazy_extractors import _ALL_CLASSES + _LAZY_LOADER = True + +if not _LAZY_LOADER: + from ._extractors import * # noqa: F403 + _ALL_CLASSES = [ # noqa: F811 + klass + for name, klass in globals().items() + if name.endswith('IE') and name != 'GenericIE' + ] + _ALL_CLASSES.append(GenericIE) # noqa: F405 + +_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) +_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES