yt-dlp/yt_dlp/extractor/tv2.py

import re

from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    float_or_none,
    js_to_json,
    parse_iso8601,
    remove_end,
    strip_or_none,
    try_get,
)


class TV2IE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tv2\.no/v(?:ideo)?\d*/(?:[^?#]+/)*(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.tv2.no/v/1791207/',
        'info_dict': {
            'id': '1791207',
            'ext': 'mp4',
            'title': 'Her kolliderer romsonden med asteroiden ',
            'description': 'En romsonde har krasjet inn i en asteroide i verdensrommet. Kollisjonen skjedde klokken 01:14 natt til tirsdag 27. september norsk tid. \n\nNasa kaller det sitt første forsøk på planetforsvar.',
            'timestamp': 1664238190,
            'upload_date': '20220927',
            'duration': 146,
            'thumbnail': r're:^https://.*$',
            'view_count': int,
            'categories': list,
        },
    }, {
        'url': 'http://www.tv2.no/v2/916509',
        'only_matching': True,
    }, {
        'url': 'https://www.tv2.no/video/nyhetene/her-kolliderer-romsonden-med-asteroiden/1791207/',
        'only_matching': True,
    }]
    _PROTOCOLS = ('HLS', 'DASH')
    _GEO_COUNTRIES = ['NO']

    def _real_extract(self, url):
        video_id = self._match_id(url)
        asset = self._download_json('https://sumo.tv2.no/rest/assets/' + video_id, video_id,
                                    'Downloading metadata JSON')
        title = asset['title']
        is_live = asset.get('live') is True

        formats = []
        format_urls = []
        for protocol in self._PROTOCOLS:
            try:
                data = self._download_json('https://api.sumo.tv2.no/play/%s?stream=%s' % (video_id, protocol),
                                           video_id, 'Downloading playabck JSON',
                                           headers={'content-type': 'application/json'},
                                           data='{"device":{"id":"1-1-1","name":"Nettleser (HTML)"}}'.encode())['playback']
            except ExtractorError as e:
                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                    error = self._parse_json(e.cause.response.read().decode(), video_id)['error']
                    error_code = error.get('code')
                    if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
                        self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
                    elif error_code == 'SESSION_NOT_AUTHENTICATED':
                        self.raise_login_required()
                    raise ExtractorError(error['description'])
                raise
            items = data.get('streams', [])
            for item in items:
                video_url = item.get('url')
                if not video_url or video_url in format_urls:
                    continue
                format_id = '%s-%s' % (protocol.lower(), item.get('type'))
                if not self._is_valid_url(video_url, video_id, format_id):
                    continue
                format_urls.append(video_url)
                ext = determine_ext(video_url)
                if ext == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        video_url, video_id, f4m_id=format_id, fatal=False))
                elif ext == 'm3u8':
                    if not data.get('drmProtected'):
                        formats.extend(self._extract_m3u8_formats(
                            video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False))
                elif ext == 'mpd':
                    formats.extend(self._extract_mpd_formats(
                        video_url, video_id, format_id, fatal=False))
                elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
                    pass
                else:
                    formats.append({
                        'url': video_url,
                        'format_id': format_id,
                    })
        if not formats and data.get('drmProtected'):
            self.report_drm(video_id)

        thumbnails = [{
            'id': type,
            'url': thumb_url,
        } for type, thumb_url in (asset.get('images') or {}).items()]

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'description': strip_or_none(asset.get('description')),
            'thumbnails': thumbnails,
            'timestamp': parse_iso8601(asset.get('live_broadcast_time') or asset.get('update_time')),
            'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
            'view_count': int_or_none(asset.get('views')),
            'categories': asset.get('tags', '').split(','),
            'formats': formats,
            'is_live': is_live,
        }


class TV2ArticleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?!v(?:ideo)?\d*/)[^?#]+/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.tv2.no/underholdning/forraeder/katarina-flatland-angrer-etter-forraeder-exit/15095188/',
        'info_dict': {
            'id': '15095188',
            'title': 'Katarina Flatland angrer etter Forræder-exit',
            'description': 'SANDEFJORD (TV 2): Katarina Flatland (33) måtte følge i sine fars fotspor, da hun ble forvist fra Forræder.',
        },
        'playlist_count': 2,
    }, {
        'url': 'http://www.tv2.no/a/6930542',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

        webpage = self._download_webpage(url, playlist_id)

        # Old embed pattern (looks unused nowadays)
        assets = re.findall(r'data-assetid=["\'](\d+)', webpage)

        if not assets:
            # New embed pattern
            for v in re.findall(r'(?s)(?:TV2ContentboxVideo|TV2\.TV2Video)\(({.+?})\)', webpage):
                video = self._parse_json(
                    v, playlist_id, transform_source=js_to_json, fatal=False)
                if not video:
                    continue
                asset = video.get('assetId')
                if asset:
                    assets.append(asset)

        entries = [
            self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2')
            for asset_id in assets]

        title = remove_end(self._og_search_title(webpage), ' - TV2.no')
        description = remove_end(self._og_search_description(webpage), ' - TV2.no')

        return self.playlist_result(entries, playlist_id, title, description)


class KatsomoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
        'info_dict': {
            'id': '1181321',
            'ext': 'mp4',
            'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
            'description': 'Päätöksen teki Pelicansin hallitus.',
            'timestamp': 1575116484,
            'upload_date': '20191130',
            'duration': 37.12,
            'view_count': int,
            'categories': list,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
        'only_matching': True,
    }, {
        'url': 'https://www.mtvuutiset.fi/video/prog1311159',
        'only_matching': True,
    }, {
        'url': 'https://www.katsomo.fi/#!/jakso/1311159',
        'only_matching': True,
    }]
    _API_DOMAIN = 'api.katsomo.fi'
    _PROTOCOLS = ('HLS', 'MPD')
    _GEO_COUNTRIES = ['FI']

    def _real_extract(self, url):
        video_id = self._match_id(url)
        api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)

        asset = self._download_json(
            api_base + '.json', video_id,
            'Downloading metadata JSON')['asset']
        title = asset.get('subtitle') or asset['title']
        is_live = asset.get('live') is True

        formats = []
        format_urls = []
        for protocol in self._PROTOCOLS:
            try:
                data = self._download_json(
                    api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
                    video_id, 'Downloading play JSON')['playback']
            except ExtractorError as e:
                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                    error = self._parse_json(e.cause.response.read().decode(), video_id)['error']
                    error_code = error.get('code')
                    if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
                        self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
                    elif error_code == 'SESSION_NOT_AUTHENTICATED':
                        self.raise_login_required()
                    raise ExtractorError(error['description'])
                raise
            items = try_get(data, lambda x: x['items']['item'])
            if not items:
                continue
            if not isinstance(items, list):
                items = [items]
            for item in items:
                if not isinstance(item, dict):
                    continue
                video_url = item.get('url')
                if not video_url or video_url in format_urls:
                    continue
                format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
                if not self._is_valid_url(video_url, video_id, format_id):
                    continue
                format_urls.append(video_url)
                ext = determine_ext(video_url)
                if ext == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        video_url, video_id, f4m_id=format_id, fatal=False))
                elif ext == 'm3u8':
                    if not data.get('drmProtected'):
                        formats.extend(self._extract_m3u8_formats(
                            video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False))
                elif ext == 'mpd':
                    formats.extend(self._extract_mpd_formats(
                        video_url, video_id, format_id, fatal=False))
                elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
                    pass
                else:
                    formats.append({
                        'url': video_url,
                        'format_id': format_id,
                        'tbr': int_or_none(item.get('bitrate')),
                        'filesize': int_or_none(item.get('fileSize')),
                    })
        if not formats and data.get('drmProtected'):
            self.report_drm(video_id)

        thumbnails = [{
            'id': thumbnail.get('@type'),
            'url': thumbnail.get('url'),
        } for _, thumbnail in (asset.get('imageVersions') or {}).items()]

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'description': strip_or_none(asset.get('description')),
            'thumbnails': thumbnails,
            'timestamp': parse_iso8601(asset.get('createTime')),
            'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
            'view_count': int_or_none(asset.get('views')),
            'categories': asset.get('keywords', '').split(','),
            'formats': formats,
            'is_live': is_live,
        }


class MTVUutisetArticleIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
        'info_dict': {
            'id': '1311159',
            'ext': 'mp4',
            'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
            'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
            'timestamp': 1600608966,
            'upload_date': '20200920',
            'duration': 153.7886666,
            'view_count': int,
            'categories': list,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        # multiple Youtube embeds
        'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        article_id = self._match_id(url)
        article = self._download_json(
            'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
            article_id)

        def entries():
            for video in (article.get('videos') or []):
                video_type = video.get('videotype')
                video_url = video.get('url')
                if not (video_url and video_type in ('katsomo', 'youtube')):
                    continue
                yield self.url_result(
                    video_url, video_type.capitalize(), video.get('video_id'))

        return self.playlist_result(
            entries(), article_id, article.get('title'), article.get('description'))
-												[tv2:article] Add extractor (Closes #5724)

											
										
										
											2015-05-16 21:32:53 +00:00
+								import re
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								from .common import InfoExtractor
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								from ..networking.exceptions import HTTPError
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								from ..utils import (
 								    determine_ext,
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								    ExtractorError,
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								    int_or_none,
 								    float_or_none,
-												[tv2:article] Fix extraction (Closes #10188)

											
										
										
											2016-07-29 14:43:17 +00:00
+								    js_to_json,
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								    parse_iso8601,
-												[tv2:article] Add extractor (Closes #5724)

											
										
										
											2015-05-16 21:32:53 +00:00
+								    remove_end,
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								    strip_or_none,
-												[tv2] Fix and improve extraction (closes #22787)

											
										
										
											2019-10-29 19:21:52 +00:00
+								    try_get,
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								)
 								class TV2IE(InfoExtractor):
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								    _VALID_URL = r'https?://(?:www\.)?tv2\.no/v(?:ideo)?\d*/(?:[^?#]+/)*(?P<id>\d+)'
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
+								    _TESTS = [{
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								        'url': 'http://www.tv2.no/v/1791207/',
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								        'info_dict': {
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								            'id': '1791207',
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								            'ext': 'mp4',
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								            'title': 'Her kolliderer romsonden med asteroiden ',
 								            'description': 'En romsonde har krasjet inn i en asteroide i verdensrommet. Kollisjonen skjedde klokken 01:14 natt til tirsdag 27. september norsk tid. \n\nNasa kaller det sitt første forsøk på planetforsvar.',
 								            'timestamp': 1664238190,
 								            'upload_date': '20220927',
 								            'duration': 146,
 								            'thumbnail': r're:^https://.*$',
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								            'view_count': int,
 								            'categories': list,
-												[tv2] Fix test_TV2

											
										
										
											2016-01-30 19:42:34 +00:00
+								        },
-												[tv2] Expand valid URL
Closes #1764

											
										
										
											2021-11-23 11:45:41 +00:00
+								    }, {
 								        'url': 'http://www.tv2.no/v2/916509',
 								        'only_matching': True,
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								    }, {
 								        'url': 'https://www.tv2.no/video/nyhetene/her-kolliderer-romsonden-med-asteroiden/1791207/',
 								        'only_matching': True,
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
+								    }]
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								    _PROTOCOLS = ('HLS', 'DASH')
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								    _GEO_COUNTRIES = ['NO']
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
 								    def _real_extract(self, url):
 								        video_id = self._match_id(url)
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								        asset = self._download_json('https://sumo.tv2.no/rest/assets/' + video_id, video_id,
 								                                    'Downloading metadata JSON')
 								        title = asset['title']
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
+								        is_live = asset.get('live') is True
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								        formats = []
 								        format_urls = []
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								        for protocol in self._PROTOCOLS:
 								            try:
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								                data = self._download_json('https://api.sumo.tv2.no/play/%s?stream=%s' % (video_id, protocol),
 								                                           video_id, 'Downloading playabck JSON',
 								                                           headers={'content-type': 'application/json'},
 								                                           data='{"device":{"id":"1-1-1","name":"Nettleser (HTML)"}}'.encode())['playback']
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								            except ExtractorError as e:
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
 								                    error = self._parse_json(e.cause.response.read().decode(), video_id)['error']
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								                    error_code = error.get('code')
 								                    if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
 								                        self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 								                    elif error_code == 'SESSION_NOT_AUTHENTICATED':
 								                        self.raise_login_required()
 								                    raise ExtractorError(error['description'])
 								                raise
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								            items = data.get('streams', [])
-												[tv2] Fix and improve extraction (closes #22787)

											
										
										
											2019-10-29 19:21:52 +00:00
+								            for item in items:
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								                video_url = item.get('url')
 								                if not video_url or video_url in format_urls:
 								                    continue
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								                format_id = '%s-%s' % (protocol.lower(), item.get('type'))
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								                if not self._is_valid_url(video_url, video_id, format_id):
 								                    continue
 								                format_urls.append(video_url)
 								                ext = determine_ext(video_url)
 								                if ext == 'f4m':
 								                    formats.extend(self._extract_f4m_formats(
-												[tv2] Improve extraction

											
										
										
											2016-07-29 15:01:34 +00:00
+								                        video_url, video_id, f4m_id=format_id, fatal=False))
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								                elif ext == 'm3u8':
-												[tv2] detect DRM protection

											
										
										
											2019-11-30 14:50:17 +00:00
+								                    if not data.get('drmProtected'):
 								                        formats.extend(self._extract_m3u8_formats(
-												[cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos

`live` parameter already handles changing the protocol

											
										
										
											2022-03-02 17:29:01 +00:00
+								                            video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False))
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								                elif ext == 'mpd':
 								                    formats.extend(self._extract_mpd_formats(
 								                        video_url, video_id, format_id, fatal=False))
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								                elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
 								                    pass
 								                else:
 								                    formats.append({
 								                        'url': video_url,
 								                        'format_id': format_id,
 								                    })
-												[tv2] detect DRM protection

											
										
										
											2019-11-30 14:50:17 +00:00
+								        if not formats and data.get('drmProtected'):
-												[extractor] Better error message for DRM (#729)

Closes #636
											
										
										
											2021-08-22 20:08:38 +00:00
+								            self.report_drm(video_id)
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
 								        thumbnails = [{
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								            'id': type,
 								            'url': thumb_url,
 								        } for type, thumb_url in (asset.get('images') or {}).items()]
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
 								        return {
 								            'id': video_id,
 								            'url': video_url,
-												[extractor] Standardize `_live_title`

											
										
										
											2021-12-15 16:00:46 +00:00
+								            'title': title,
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								            'description': strip_or_none(asset.get('description')),
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								            'thumbnails': thumbnails,
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								            'timestamp': parse_iso8601(asset.get('live_broadcast_time') or asset.get('update_time')),
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								            'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
 								            'view_count': int_or_none(asset.get('views')),
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								            'categories': asset.get('tags', '').split(','),
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								            'formats': formats,
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
+								            'is_live': is_live,
-												[tv2] Add extractor (#5724)

											
										
										
											2015-05-16 21:01:52 +00:00
+								        }
-												[tv2:article] Add extractor (Closes #5724)

											
										
										
											2015-05-16 21:32:53 +00:00
 								class TV2ArticleIE(InfoExtractor):
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								    _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?!v(?:ideo)?\d*/)[^?#]+/(?P<id>\d+)'
-												[tv2:article] Add extractor (Closes #5724)

											
										
										
											2015-05-16 21:32:53 +00:00
+								    _TESTS = [{
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								        'url': 'https://www.tv2.no/underholdning/forraeder/katarina-flatland-angrer-etter-forraeder-exit/15095188/',
-												[tv2:article] Add extractor (Closes #5724)

											
										
										
											2015-05-16 21:32:53 +00:00
+								        'info_dict': {
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								            'id': '15095188',
 								            'title': 'Katarina Flatland angrer etter Forræder-exit',
 								            'description': 'SANDEFJORD (TV 2): Katarina Flatland (33) måtte følge i sine fars fotspor, da hun ble forvist fra Forræder.',
-												[tv2:article] Add extractor (Closes #5724)

											
										
										
											2015-05-16 21:32:53 +00:00
+								        },
 								        'playlist_count': 2,
 								    }, {
 								        'url': 'http://www.tv2.no/a/6930542',
 								        'only_matching': True,
 								    }]
 								    def _real_extract(self, url):
 								        playlist_id = self._match_id(url)
 								        webpage = self._download_webpage(url, playlist_id)
-												[tv2:article] Fix extraction (Closes #10188)

											
										
										
											2016-07-29 14:43:17 +00:00
+								        # Old embed pattern (looks unused nowadays)
 								        assets = re.findall(r'data-assetid=["\'](\d+)', webpage)
 								        if not assets:
 								            # New embed pattern
-												[extractor/tv2] Support new url format (#5063)

Closes #4973
Authored by: tobi1805
											
										
										
											2022-09-30 16:57:15 +00:00
+								            for v in re.findall(r'(?s)(?:TV2ContentboxVideo|TV2\.TV2Video)\(({.+?})\)', webpage):
-												[tv2:article] Fix extraction (Closes #10188)

											
										
										
											2016-07-29 14:43:17 +00:00
+								                video = self._parse_json(
 								                    v, playlist_id, transform_source=js_to_json, fatal=False)
 								                if not video:
 								                    continue
 								                asset = video.get('assetId')
 								                if asset:
 								                    assets.append(asset)
-												[tv2:article] Add extractor (Closes #5724)

											
										
										
											2015-05-16 21:32:53 +00:00
+								        entries = [
-												[tv2:article] Fix extraction (Closes #10188)

											
										
										
											2016-07-29 14:43:17 +00:00
+								            self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2')
 								            for asset_id in assets]
-												[tv2:article] Add extractor (Closes #5724)

											
										
										
											2015-05-16 21:32:53 +00:00
 								        title = remove_end(self._og_search_title(webpage), ' - TV2.no')
 								        description = remove_end(self._og_search_description(webpage), ' - TV2.no')
 								        return self.playlist_result(entries, playlist_id, title, description)
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								class KatsomoIE(InfoExtractor):
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
+								    _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
 								    _TESTS = [{
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								        'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
 								        'info_dict': {
 								            'id': '1181321',
 								            'ext': 'mp4',
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
+								            'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								            'description': 'Päätöksen teki Pelicansin hallitus.',
 								            'timestamp': 1575116484,
 								            'upload_date': '20191130',
 								            'duration': 37.12,
 								            'view_count': int,
 								            'categories': list,
 								        },
 								        'params': {
 								            # m3u8 download
 								            'skip_download': True,
 								        },
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
+								    }, {
 								        'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://www.mtvuutiset.fi/video/prog1311159',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://www.katsomo.fi/#!/jakso/1311159',
 								        'only_matching': True,
 								    }]
-												[tv2] add support for mtv.fi and fix tv2.no article extraction(closes #10543)

											
										
										
											2019-11-30 14:26:12 +00:00
+								    _API_DOMAIN = 'api.katsomo.fi'
 								    _PROTOCOLS = ('HLS', 'MPD')
 								    _GEO_COUNTRIES = ['FI']
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								    def _real_extract(self, url):
 								        video_id = self._match_id(url)
 								        api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
 								        asset = self._download_json(
 								            api_base + '.json', video_id,
 								            'Downloading metadata JSON')['asset']
 								        title = asset.get('subtitle') or asset['title']
 								        is_live = asset.get('live') is True
 								        formats = []
 								        format_urls = []
 								        for protocol in self._PROTOCOLS:
 								            try:
 								                data = self._download_json(
 								                    api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
 								                    video_id, 'Downloading play JSON')['playback']
 								            except ExtractorError as e:
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
 								                    error = self._parse_json(e.cause.response.read().decode(), video_id)['error']
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								                    error_code = error.get('code')
 								                    if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
 								                        self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 								                    elif error_code == 'SESSION_NOT_AUTHENTICATED':
 								                        self.raise_login_required()
 								                    raise ExtractorError(error['description'])
 								                raise
 								            items = try_get(data, lambda x: x['items']['item'])
 								            if not items:
 								                continue
 								            if not isinstance(items, list):
 								                items = [items]
 								            for item in items:
 								                if not isinstance(item, dict):
 								                    continue
 								                video_url = item.get('url')
 								                if not video_url or video_url in format_urls:
 								                    continue
 								                format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
 								                if not self._is_valid_url(video_url, video_id, format_id):
 								                    continue
 								                format_urls.append(video_url)
 								                ext = determine_ext(video_url)
 								                if ext == 'f4m':
 								                    formats.extend(self._extract_f4m_formats(
 								                        video_url, video_id, f4m_id=format_id, fatal=False))
 								                elif ext == 'm3u8':
 								                    if not data.get('drmProtected'):
 								                        formats.extend(self._extract_m3u8_formats(
-												[cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos

`live` parameter already handles changing the protocol

											
										
										
											2022-03-02 17:29:01 +00:00
+								                            video_url, video_id, 'mp4', live=is_live, m3u8_id=format_id, fatal=False))
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								                elif ext == 'mpd':
 								                    formats.extend(self._extract_mpd_formats(
 								                        video_url, video_id, format_id, fatal=False))
 								                elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
 								                    pass
 								                else:
 								                    formats.append({
 								                        'url': video_url,
 								                        'format_id': format_id,
 								                        'tbr': int_or_none(item.get('bitrate')),
 								                        'filesize': int_or_none(item.get('fileSize')),
 								                    })
 								        if not formats and data.get('drmProtected'):
 								            self.report_drm(video_id)
 								        thumbnails = [{
 								            'id': thumbnail.get('@type'),
 								            'url': thumbnail.get('url'),
 								        } for _, thumbnail in (asset.get('imageVersions') or {}).items()]
 								        return {
 								            'id': video_id,
 								            'url': video_url,
-												[extractor] Standardize `_live_title`

											
										
										
											2021-12-15 16:00:46 +00:00
+								            'title': title,
-												[TV2] Fix extractor (#766)

Closes #764 
Authored by: Ashish0804
											
										
										
											2021-08-23 16:02:33 +00:00
+								            'description': strip_or_none(asset.get('description')),
 								            'thumbnails': thumbnails,
 								            'timestamp': parse_iso8601(asset.get('createTime')),
 								            'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
 								            'view_count': int_or_none(asset.get('views')),
 								            'categories': asset.get('keywords', '').split(','),
 								            'formats': formats,
 								            'is_live': is_live,
 								        }
-												Update to ytdl-2021.02.04.1 except youtube

											
										
										
											2021-02-04 07:56:01 +00:00
 								class MTVUutisetArticleIE(InfoExtractor):
 								    _VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
 								    _TESTS = [{
 								        'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
 								        'info_dict': {
 								            'id': '1311159',
 								            'ext': 'mp4',
 								            'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
 								            'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
 								            'timestamp': 1600608966,
 								            'upload_date': '20200920',
 								            'duration': 153.7886666,
 								            'view_count': int,
 								            'categories': list,
 								        },
 								        'params': {
 								            # m3u8 download
 								            'skip_download': True,
 								        },
 								    }, {
 								        # multiple Youtube embeds
 								        'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
 								        'only_matching': True,
 								    }]
 								    def _real_extract(self, url):
 								        article_id = self._match_id(url)
 								        article = self._download_json(
 								            'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
 								            article_id)
 								        def entries():
 								            for video in (article.get('videos') or []):
 								                video_type = video.get('videotype')
 								                video_url = video.get('url')
 								                if not (video_url and video_type in ('katsomo', 'youtube')):
 								                    continue
 								                yield self.url_result(
 								                    video_url, video_type.capitalize(), video.get('video_id'))
 								        return self.playlist_result(
 								            entries(), article_id, article.get('title'), article.get('description'))