From 5fff2e576f5a36ba253e53880566db932b9b7621 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 2 Aug 2022 01:00:55 +0530 Subject: [PATCH] [extractor/camtasia] Separate into own extractor (#4307) Authored by: coletdjnz --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/camtasia.py | 71 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/generic.py | 67 ------------------------------- 3 files changed, 72 insertions(+), 67 deletions(-) create mode 100644 yt_dlp/extractor/camtasia.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b105437c3..b62b8113c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -219,6 +219,7 @@ from .camdemy import ( CamdemyFolderIE ) from .cammodels import CamModelsIE +from .camtasia import CamtasiaEmbedIE from .camwithher import CamWithHerIE from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE diff --git a/yt_dlp/extractor/camtasia.py b/yt_dlp/extractor/camtasia.py new file mode 100644 index 000000000..70ab6c62a --- /dev/null +++ b/yt_dlp/extractor/camtasia.py @@ -0,0 +1,71 @@ +import os +import urllib.parse + +from .common import InfoExtractor +from ..utils import float_or_none + + +class CamtasiaEmbedIE(InfoExtractor): + _VALID_URL = False + _WEBPAGE_TESTS = [ + { + 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', + 'playlist': [{ + 'md5': '0c5e352edabf715d762b0ad4e6d9ee67', + 'info_dict': { + 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', + 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1', + 'ext': 'flv', + 'duration': 2235.90, + } + }, { + 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63', + 'info_dict': { + 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP', + 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip', + 'ext': 'flv', + 'duration': 2235.93, + } + }], + 'info_dict': { + 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', + }, + 'skip': 'webpage dead' + }, + + ] + + def _extract_from_webpage(self, url, webpage): + camtasia_cfg = self._search_regex( + r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);', + webpage, 'camtasia configuration file', default=None) + if camtasia_cfg is None: + return None + + title = self._html_search_meta('DC.title', webpage, fatal=True) + + camtasia_url = urllib.parse.urljoin(url, camtasia_cfg) + camtasia_cfg = self._download_xml( + camtasia_url, self._generic_id(url), + note='Downloading camtasia configuration', + errnote='Failed to download camtasia configuration') + fileset_node = camtasia_cfg.find('./playlist/array/fileset') + + entries = [] + for n in fileset_node.getchildren(): + url_n = n.find('./uri') + if url_n is None: + continue + + entries.append({ + 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], + 'title': f'{title} - {n.tag}', + 'url': urllib.parse.urljoin(url, url_n.text), + 'duration': float_or_none(n.find('./duration').text), + }) + + return { + '_type': 'playlist', + 'entries': entries, + 'title': title, + } diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 0dc9ae0da..3d574cd02 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -933,30 +933,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, - # Camtasia studio - { - 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', - 'playlist': [{ - 'md5': '0c5e352edabf715d762b0ad4e6d9ee67', - 'info_dict': { - 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', - 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1', - 'ext': 'flv', - 'duration': 2235.90, - } - }, { - 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63', - 'info_dict': { - 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP', - 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip', - 'ext': 'flv', - 'duration': 2235.93, - } - }], - 'info_dict': { - 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', - } - }, # Flowplayer { 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html', @@ -2680,43 +2656,6 @@ class GenericIE(InfoExtractor): 'entries': entries, } - def _extract_camtasia(self, url, video_id, webpage): - """ Returns None if no camtasia video can be found. """ - - camtasia_cfg = self._search_regex( - r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);', - webpage, 'camtasia configuration file', default=None) - if camtasia_cfg is None: - return None - - title = self._html_search_meta('DC.title', webpage, fatal=True) - - camtasia_url = urllib.parse.urljoin(url, camtasia_cfg) - camtasia_cfg = self._download_xml( - camtasia_url, video_id, - note='Downloading camtasia configuration', - errnote='Failed to download camtasia configuration') - fileset_node = camtasia_cfg.find('./playlist/array/fileset') - - entries = [] - for n in fileset_node.getchildren(): - url_n = n.find('./uri') - if url_n is None: - continue - - entries.append({ - 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], - 'title': f'{title} - {n.tag}', - 'url': urllib.parse.urljoin(url, url_n.text), - 'duration': float_or_none(n.find('./duration').text), - }) - - return { - '_type': 'playlist', - 'entries': entries, - 'title': title, - } - def _kvs_getrealurl(self, video_url, license_code): if not video_url.startswith('function/0/'): return video_url # not obfuscated @@ -2920,12 +2859,6 @@ class GenericIE(InfoExtractor): except xml.etree.ElementTree.ParseError: pass - # Is it a Camtasia project? - camtasia_res = self._extract_camtasia(url, video_id, webpage) - if camtasia_res is not None: - self.report_detected('Camtasia video') - return camtasia_res - info_dict.update({ # it's tempting to parse this further, but you would # have to take into account all the variations like