yt-dlp/youtube_dl/extractor/sztvhu.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor


class SztvHuIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',
        'md5': 'a6df607b11fb07d0e9f2ad94613375cb',
        'info_dict': {
            'id': '20130909',
            'ext': 'mp4',
            'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',
            'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_file = self._search_regex(
            r'file: "...:(.*?)",', webpage, 'video file')
        title = self._html_search_regex(
            r'<meta name="title" content="([^"]*?) - [^-]*? - [^-]*?"',
            webpage, 'video title')
        description = self._html_search_regex(
            r'<meta name="description" content="([^"]*)"/>',
            webpage, 'video description', fatal=False)
        thumbnail = self._og_search_thumbnail(webpage)

        video_url = 'http://media.sztv.hu/vod/' + video_file

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
        }
Unify coding cookie 2016-10-02 11:39:18 +00:00			`# coding: utf-8`
[sztvhu] Modernize 2014-11-21 21:02:16 +00:00			`from __future__ import unicode_literals`
Add an extractor for Szombathelyi TV 2013-10-14 11:07:47 +00:00
			`from .common import InfoExtractor`

[sztvhu] Simplify 2013-10-14 23:33:20 +00:00
Add an extractor for Szombathelyi TV 2013-10-14 11:07:47 +00:00			`class SztvHuIE(InfoExtractor):`
Add support for https for all extractors as preventive and future-proof measure 2016-03-21 15:36:32 +00:00			`_VALID_URL = r'https?://(?:(?:www\.)?sztv\.hu\|www\.tvszombathely\.hu)/(?:[^/]+)/.+-(?P<id>[0-9]+)'`
Add an extractor for Szombathelyi TV 2013-10-14 11:07:47 +00:00			`_TEST = {`
[sztvhu] Modernize 2014-11-21 21:02:16 +00:00			`'url': 'http://sztv.hu/hirek/cserkeszek-nepszerusitettek-a-kornyezettudatos-eletmodot-a-savaria-teren-20130909',`
			`'md5': 'a6df607b11fb07d0e9f2ad94613375cb',`
			`'info_dict': {`
			`'id': '20130909',`
			`'ext': 'mp4',`
			`'title': 'Cserkészek népszerűsítették a környezettudatos életmódot a Savaria téren',`
			`'description': 'A zöld nap játékos ismeretterjesztő programjait a Magyar Cserkész Szövetség szervezte, akik az ország nyolc városában adják át tudásukat az érdeklődőknek. A PET...',`
[sztv] skip test, site is undergoing mid-term maintenance 2013-11-20 08:59:03 +00:00			`},`
Add an extractor for Szombathelyi TV 2013-10-14 11:07:47 +00:00			`}`

			`def _real_extract(self, url):`
[sztvhu] Modernize 2014-11-21 21:02:16 +00:00			`video_id = self._match_id(url)`
[sztvhu] Simplify 2013-10-14 23:33:20 +00:00			`webpage = self._download_webpage(url, video_id)`
			`video_file = self._search_regex(`
			`r'file: "...:(.*?)",', webpage, 'video file')`
			`title = self._html_search_regex(`
[sztvhu] Fix the title extraction 2013-10-15 06:22:59 +00:00			`r'<meta name="title" content="([^"]?) - [^-]? - [^-]*?"',`
[sztvhu] Simplify 2013-10-14 23:33:20 +00:00			`webpage, 'video title')`
			`description = self._html_search_regex(`
			`r'<meta name="description" content="([^"]*)"/>',`
			`webpage, 'video description', fatal=False)`
Add an extractor for Szombathelyi TV 2013-10-14 11:07:47 +00:00			`thumbnail = self._og_search_thumbnail(webpage)`

[sztvhu] Simplify 2013-10-14 23:33:20 +00:00			`video_url = 'http://media.sztv.hu/vod/' + video_file`
Add an extractor for Szombathelyi TV 2013-10-14 11:07:47 +00:00
[sztvhu] Simplify 2013-10-14 23:33:20 +00:00			`return {`
			`'id': video_id,`
			`'url': video_url,`
			`'title': title,`
			`'description': description,`
			`'thumbnail': thumbnail,`
			`}`