yt-dlp/yt_dlp/extractor/crowdbunker.py

import itertools

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    try_get,
    unified_strdate,
)


class CrowdBunkerIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'

    _TESTS = [{
        'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
        'info_dict': {
            'id': '0z4Kms8pi8I',
            'ext': 'mp4',
            'title': '117) Pass vax et solutions',
            'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
            'view_count': int,
            'duration': 5386,
            'uploader': 'Jérémie Mercier',
            'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
            'like_count': int,
            'upload_date': '20211218',
            'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg',
        },
        'params': {'skip_download': True},
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        data_json = self._download_json(
            f'https://api.divulg.org/post/{video_id}/details', video_id,
            headers={'accept': 'application/json, text/plain, */*'})
        video_json = data_json['video']
        formats, subtitles = [], {}
        for sub in video_json.get('captions') or []:
            sub_url = try_get(sub, lambda x: x['file']['url'])
            if not sub_url:
                continue
            subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
                'url': sub_url,
            })

        mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
        if mpd_url:
            fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id)
            formats.extend(fmts)
            subtitles = self._merge_subtitles(subtitles, subs)
        m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
        if m3u8_url:
            fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, video_id)
            formats.extend(fmts)
            subtitles = self._merge_subtitles(subtitles, subs)

        thumbnails = [{
            'url': image['url'],
            'height': int_or_none(image.get('height')),
            'width': int_or_none(image.get('width')),
        } for image in video_json.get('thumbnails') or [] if image.get('url')]

        return {
            'id': video_id,
            'title': video_json.get('title'),
            'description': video_json.get('description'),
            'view_count': video_json.get('viewCount'),
            'duration': video_json.get('duration'),
            'uploader': try_get(data_json, lambda x: x['channel']['name']),
            'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
            'like_count': data_json.get('likesCount'),
            'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
            'thumbnails': thumbnails,
            'formats': formats,
            'subtitles': subtitles,
        }


class CrowdBunkerChannelIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'

    _TESTS = [{
        'url': 'https://crowdbunker.com/@Milan_UHRIN',
        'playlist_mincount': 14,
        'info_dict': {
            'id': 'Milan_UHRIN',
        },
    }]

    def _entries(self, playlist_id):
        last = None

        for page in itertools.count():
            channel_json = self._download_json(
                f'https://api.divulg.org/organization/{playlist_id}/posts', playlist_id,
                headers={'accept': 'application/json, text/plain, */*'},
                query={'after': last} if last else {}, note=f'Downloading Page {page}')
            for item in channel_json.get('items') or []:
                v_id = item.get('uid')
                if not v_id:
                    continue
                yield self.url_result(
                    f'https://crowdbunker.com/v/{v_id}', ie=CrowdBunkerIE.ie_key(), video_id=v_id)
            last = channel_json.get('last')
            if not last:
                break

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id)
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`import itertools`

			`from .common import InfoExtractor`
			`from ..utils import (`
			`int_or_none,`
			`try_get,`
			`unified_strdate,`
			`)`


			`class CrowdBunkerIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'`

			`_TESTS = [{`
			`'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',`
			`'info_dict': {`
			`'id': '0z4Kms8pi8I',`
			`'ext': 'mp4',`
			`'title': '117) Pass vax et solutions',`
			`'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',`
			`'view_count': int,`
			`'duration': 5386,`
			`'uploader': 'Jérémie Mercier',`
			`'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',`
			`'like_count': int,`
			`'upload_date': '20211218',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg',`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`},`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`'params': {'skip_download': True},`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`}]`

			`def _real_extract(self, url):`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`video_id = self._match_id(url)`
			`data_json = self._download_json(`
			`f'https://api.divulg.org/post/{video_id}/details', video_id,`
			`headers={'accept': 'application/json, text/plain, /'})`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`video_json = data_json['video']`
			`formats, subtitles = [], {}`
			`for sub in video_json.get('captions') or []:`
			`sub_url = try_get(sub, lambda x: x['file']['url'])`
			`if not sub_url:`
			`continue`
			`subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({`
			`'url': sub_url,`
			`})`

			`mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])`
			`if mpd_url:`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id)`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`formats.extend(fmts)`
			`subtitles = self._merge_subtitles(subtitles, subs)`
			`m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])`
			`if m3u8_url:`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, video_id)`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`formats.extend(fmts)`
			`subtitles = self._merge_subtitles(subtitles, subs)`

			`thumbnails = [{`
			`'url': image['url'],`
			`'height': int_or_none(image.get('height')),`
			`'width': int_or_none(image.get('width')),`
			`} for image in video_json.get('thumbnails') or [] if image.get('url')]`

			`return {`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`'id': video_id,`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`'title': video_json.get('title'),`
			`'description': video_json.get('description'),`
			`'view_count': video_json.get('viewCount'),`
			`'duration': video_json.get('duration'),`
			`'uploader': try_get(data_json, lambda x: x['channel']['name']),`
			`'uploader_id': try_get(data_json, lambda x: x['channel']['id']),`
			`'like_count': data_json.get('likesCount'),`
			`'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),`
			`'thumbnails': thumbnails,`
			`'formats': formats,`
			`'subtitles': subtitles,`
			`}`


			`class CrowdBunkerChannelIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'`

			`_TESTS = [{`
			`'url': 'https://crowdbunker.com/@Milan_UHRIN',`
			`'playlist_mincount': 14,`
			`'info_dict': {`
			`'id': 'Milan_UHRIN',`
			`},`
			`}]`

[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`def _entries(self, playlist_id):`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`last = None`

			`for page in itertools.count():`
			`channel_json = self._download_json(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`f'https://api.divulg.org/organization/{playlist_id}/posts', playlist_id,`
			`headers={'accept': 'application/json, text/plain, /'},`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`query={'after': last} if last else {}, note=f'Downloading Page {page}')`
			`for item in channel_json.get('items') or []:`
			`v_id = item.get('uid')`
			`if not v_id:`
			`continue`
			`yield self.url_result(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`f'https://crowdbunker.com/v/{v_id}', ie=CrowdBunkerIE.ie_key(), video_id=v_id)`
[CrowdBunker] Add extractors (#2407) Closes: #2356 Authored by: Ashish0804 2022-01-21 05:55:55 +00:00			`last = channel_json.get('last')`
			`if not last:`
			`break`

			`def _real_extract(self, url):`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 23:09:58 +00:00			`playlist_id = self._match_id(url)`
			`return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id)`