yt-dlp/yt_dlp/extractor/rbgtum.py

import re

from .common import InfoExtractor
from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj


class RbgTumIE(InfoExtractor):
    _VALID_URL = r'https?://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
    _TESTS = [{
        # Combined view
        'url': 'https://live.rbg.tum.de/w/cpp/22128',
        'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
        'info_dict': {
            'id': 'cpp/22128',
            'ext': 'mp4',
            'title': 'Lecture: October 18. 2022',
            'series': 'Concepts of C++ programming (IN2377)',
        },
    }, {
        # Presentation only
        'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
        'md5': '36c584272179f3e56b0db5d880639cba',
        'info_dict': {
            'id': 'I2DL/12349/PRES',
            'ext': 'mp4',
            'title': 'Lecture 3: Introduction to Neural Networks',
            'series': 'Introduction to Deep Learning (IN2346)',
        },
    }, {
        # Camera only
        'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
        'md5': 'e04189d92ff2f56aedf5cede65d37aad',
        'info_dict': {
            'id': 'fvv-info/16130/CAM',
            'ext': 'mp4',
            'title': 'Fachschaftsvollversammlung',
            'series': 'Fachschaftsvollversammlung Informatik',
        },
    }, {
        'url': 'https://tum.live/w/linalginfo/27102',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
        lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
        lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')

        formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')

        return {
            'id': video_id,
            'title': lecture_title,
            'series': lecture_series_title,
            'formats': formats,
        }


class RbgTumCourseIE(InfoExtractor):
    _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
    _TESTS = [{
        'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
        'info_dict': {
            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
            'id': '2022/S/fpv',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 13,
    }, {
        'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
        'info_dict': {
            'title': 'SET FSMPIC',
            'id': '2022/W/set',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 6,
    }, {
        'url': 'https://tum.live/old/course/2023/S/linalginfo',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
        meta = self._download_json(
            f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
            query={'year': year, 'term': term}) or {}
        lecture_series_title = meta.get('Name')
        lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
                    for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]

        if not lectures:
            webpage = self._download_webpage(url, course_id)
            lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
            lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
                        for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]

        return self.playlist_result(lectures, course_id, lecture_series_title)


class RbgTumNewCourseIE(InfoExtractor):
    _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
    _TESTS = [{
        'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
        'info_dict': {
            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
            'id': '2022/S/fpv',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 13,
    }, {
        'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
        'info_dict': {
            'title': 'SET FSMPIC',
            'id': '2022/W/set',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 6,
    }, {
        'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        query = parse_qs(url)
        errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
        if errors:
            raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
        year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
        hostname = self._match_valid_url(url).group('hostname')

        return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00			`import re`

			`from .common import InfoExtractor`
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409) Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> 2024-05-26 19:27:21 +00:00			`from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00

			`class RbgTumIE(InfoExtractor):`
[cleanup, ie] Match both `http` and `https` in `_VALID_URL` (#8968) Except for Vimeo, since that causes matching collisions. Authored by: seproDev 2024-02-01 18:38:42 +00:00			`_VALID_URL = r'https?://(?:live\.rbg\.tum\.de\|tum\.live)/w/(?P<id>[^?#]+)'`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00			`_TESTS = [{`
			`# Combined view`
			`'url': 'https://live.rbg.tum.de/w/cpp/22128',`
			`'md5': '53a5e7b3e07128e33bbf36687fe1c08f',`
			`'info_dict': {`
			`'id': 'cpp/22128',`
			`'ext': 'mp4',`
			`'title': 'Lecture: October 18. 2022',`
			`'series': 'Concepts of C++ programming (IN2377)',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`},`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00			`}, {`
			`# Presentation only`
			`'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',`
			`'md5': '36c584272179f3e56b0db5d880639cba',`
			`'info_dict': {`
			`'id': 'I2DL/12349/PRES',`
			`'ext': 'mp4',`
			`'title': 'Lecture 3: Introduction to Neural Networks',`
			`'series': 'Introduction to Deep Learning (IN2346)',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`},`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00			`}, {`
			`# Camera only`
			`'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',`
			`'md5': 'e04189d92ff2f56aedf5cede65d37aad',`
			`'info_dict': {`
			`'id': 'fvv-info/16130/CAM',`
			`'ext': 'mp4',`
			`'title': 'Fachschaftsvollversammlung',`
			`'series': 'Fachschaftsvollversammlung Informatik',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`},`
[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`}, {`
			`'url': 'https://tum.live/w/linalginfo/27102',`
			`'only_matching': True,`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`}]`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00
			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`

[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')`
			`lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)`
			`lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live \| ')`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00
			`formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')`

			`return {`
			`'id': video_id,`
			`'title': lecture_title,`
			`'series': lecture_series_title,`
			`'formats': formats,`
			`}`


			`class RbgTumCourseIE(InfoExtractor):`
[cleanup, ie] Match both `http` and `https` in `_VALID_URL` (#8968) Except for Vimeo, since that causes matching collisions. Authored by: seproDev 2024-02-01 18:38:42 +00:00			`_VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de\|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00			`_TESTS = [{`
[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00			`'info_dict': {`
			`'title': 'Funktionale Programmierung und Verifikation (IN0003)',`
			`'id': '2022/S/fpv',`
			`},`
			`'params': {`
			`'noplaylist': False,`
			`},`
			`'playlist_count': 13,`
			`}, {`
[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`'url': 'https://live.rbg.tum.de/old/course/2022/W/set',`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00			`'info_dict': {`
			`'title': 'SET FSMPIC',`
			`'id': '2022/W/set',`
			`},`
			`'params': {`
			`'noplaylist': False,`
			`},`
			`'playlist_count': 6,`
[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`}, {`
			`'url': 'https://tum.live/old/course/2023/S/linalginfo',`
			`'only_matching': True,`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`}]`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00
			`def _real_extract(self, url):`
[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')`
			`meta = self._download_json(`
			`f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,`
			`query={'year': year, 'term': term}) or {}`
			`lecture_series_title = meta.get('Name')`
			`lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)`
			`for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]`

			`if not lectures:`
			`webpage = self._download_webpage(url, course_id)`
			`lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live \| ')`
			`lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)`
			`for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]`

			`return self.playlist_result(lectures, course_id, lecture_series_title)`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00

[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`class RbgTumNewCourseIE(InfoExtractor):`
[cleanup, ie] Match both `http` and `https` in `_VALID_URL` (#8968) Except for Vimeo, since that causes matching collisions. Authored by: seproDev 2024-02-01 18:38:42 +00:00			`_VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de\|tum\.live))/\?'`
[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`_TESTS = [{`
			`'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',`
			`'info_dict': {`
			`'title': 'Funktionale Programmierung und Verifikation (IN0003)',`
			`'id': '2022/S/fpv',`
			`},`
			`'params': {`
			`'noplaylist': False,`
			`},`
			`'playlist_count': 13,`
			`}, {`
			`'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',`
			`'info_dict': {`
			`'title': 'SET FSMPIC',`
			`'id': '2022/W/set',`
			`},`
			`'params': {`
			`'noplaylist': False,`
			`},`
			`'playlist_count': 6,`
			`}, {`
			`'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',`
			`'only_matching': True,`
			`}]`

			`def _real_extract(self, url):`
			`query = parse_qs(url)`
			`errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]`
			`if errors:`
			`raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')`
			`year, term, slug = query['year'][0], query['term'][0], query['slug'][0]`
			`hostname = self._match_valid_url(url).group('hostname')`
Update to ytdl-commit-2dd6c6e [YouTube] Avoid crash if uploader_id extraction fails https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772 Except: * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p 2023-02-17 11:21:34 +00:00
[ie/rbgtum] Fix extraction and support new URL format (#7690) Authored by: simon300000 2023-09-21 17:37:58 +00:00			`return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)`