yt-dlp/youtube_dl/extractor/dcn.py

203 lines
8.0 KiB
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
import re
import base64
2015-07-18 08:51:59 +00:00
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_str,
)
2015-08-07 18:06:03 +00:00
from ..utils import (
int_or_none,
parse_iso8601,
Switch codebase to use sanitized_Request instead of compat_urllib_request.Request [downloader/dash] Use sanitized_Request [downloader/http] Use sanitized_Request [atresplayer] Use sanitized_Request [bambuser] Use sanitized_Request [bliptv] Use sanitized_Request [brightcove] Use sanitized_Request [cbs] Use sanitized_Request [ceskatelevize] Use sanitized_Request [collegerama] Use sanitized_Request [extractor/common] Use sanitized_Request [crunchyroll] Use sanitized_Request [dailymotion] Use sanitized_Request [dcn] Use sanitized_Request [dramafever] Use sanitized_Request [dumpert] Use sanitized_Request [eitb] Use sanitized_Request [escapist] Use sanitized_Request [everyonesmixtape] Use sanitized_Request [extremetube] Use sanitized_Request [facebook] Use sanitized_Request [fc2] Use sanitized_Request [flickr] Use sanitized_Request [4tube] Use sanitized_Request [gdcvault] Use sanitized_Request [extractor/generic] Use sanitized_Request [hearthisat] Use sanitized_Request [hotnewhiphop] Use sanitized_Request [hypem] Use sanitized_Request [iprima] Use sanitized_Request [ivi] Use sanitized_Request [keezmovies] Use sanitized_Request [letv] Use sanitized_Request [lynda] Use sanitized_Request [metacafe] Use sanitized_Request [minhateca] Use sanitized_Request [miomio] Use sanitized_Request [meovideo] Use sanitized_Request [mofosex] Use sanitized_Request [moniker] Use sanitized_Request [mooshare] Use sanitized_Request [movieclips] Use sanitized_Request [mtv] Use sanitized_Request [myvideo] Use sanitized_Request [neteasemusic] Use sanitized_Request [nfb] Use sanitized_Request [niconico] Use sanitized_Request [noco] Use sanitized_Request [nosvideo] Use sanitized_Request [novamov] Use sanitized_Request [nowness] Use sanitized_Request [nuvid] Use sanitized_Request [played] Use sanitized_Request [pluralsight] Use sanitized_Request [pornhub] Use sanitized_Request [pornotube] Use sanitized_Request [primesharetv] Use sanitized_Request [promptfile] Use sanitized_Request [qqmusic] Use sanitized_Request [rtve] Use sanitized_Request [safari] Use sanitized_Request [sandia] Use sanitized_Request [shared] Use sanitized_Request [sharesix] Use sanitized_Request [sina] Use sanitized_Request [smotri] Use sanitized_Request [sohu] Use sanitized_Request [spankwire] Use sanitized_Request [sportdeutschland] Use sanitized_Request [streamcloud] Use sanitized_Request [streamcz] Use sanitized_Request [tapely] Use sanitized_Request [tube8] Use sanitized_Request [tubitv] Use sanitized_Request [twitch] Use sanitized_Request [twitter] Use sanitized_Request [udemy] Use sanitized_Request [vbox7] Use sanitized_Request [veoh] Use sanitized_Request [vessel] Use sanitized_Request [vevo] Use sanitized_Request [viddler] Use sanitized_Request [videomega] Use sanitized_Request [viewvster] Use sanitized_Request [viki] Use sanitized_Request [vk] Use sanitized_Request [vodlocker] Use sanitized_Request [voicerepublic] Use sanitized_Request [wistia] Use sanitized_Request [xfileshare] Use sanitized_Request [xtube] Use sanitized_Request [xvideos] Use sanitized_Request [yandexmusic] Use sanitized_Request [youku] Use sanitized_Request [youporn] Use sanitized_Request [youtube] Use sanitized_Request [patreon] Use sanitized_Request [extractor/common] Remove unused import [nfb] PEP 8
2015-11-21 16:18:17 +00:00
sanitized_Request,
smuggle_url,
unsmuggle_url,
urlencode_postdata,
2015-08-07 18:06:03 +00:00
)
2015-07-18 08:51:59 +00:00
2015-08-07 18:06:03 +00:00
class DCNIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?'
def _real_extract(self, url):
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups()
if video_id and int(video_id) > 0:
2015-12-27 08:56:15 +00:00
return self.url_result(
'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo')
elif season_id and int(season_id) > 0:
return self.url_result(smuggle_url(
'http://www.dcndigital.ae/program/season/%s' % season_id,
{'show_id': show_id}), 'DCNSeason')
else:
2015-12-27 08:56:15 +00:00
return self.url_result(
'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason')
2015-12-27 08:56:15 +00:00
class DCNBaseIE(InfoExtractor):
def _extract_video_info(self, video_data, video_id, is_live):
title = video_data.get('title_en') or video_data['title_ar']
img = video_data.get('img')
thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None
duration = int_or_none(video_data.get('duration'))
description = video_data.get('description_en') or video_data.get('description_ar')
timestamp = parse_iso8601(video_data.get('create_time'), ' ')
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'is_live': is_live,
}
def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol):
formats = []
format_url_base = 'http' + self._html_search_regex(
[
r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8',
r'<a[^>]+href="rtsp(://[^"]+)"'
], webpage, 'format url')
# TODO: Current DASH formats are broken - $Time$ pattern in
# <SegmentTemplate> not implemented yet
# formats.extend(self._extract_mpd_formats(
# format_url_base + '/manifest.mpd',
# video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_m3u8_formats(
format_url_base + '/playlist.m3u8', video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
format_url_base + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
2015-12-27 08:56:15 +00:00
self._sort_formats(formats)
return formats
class DCNVideoIE(DCNBaseIE):
IE_NAME = 'dcn:video'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375',
2015-07-18 08:51:59 +00:00
'info_dict':
{
'id': '17375',
2015-08-07 18:06:03 +00:00
'ext': 'mp4',
2015-07-18 08:51:59 +00:00
'title': 'رحلة العمر : الحلقة 1',
2015-08-07 18:06:03 +00:00
'description': 'md5:0156e935d870acb8ef0a66d24070c6d6',
'duration': 2041,
'timestamp': 1227504126,
'upload_date': '20081124',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1',
'only_matching': True,
}]
2015-07-18 08:51:59 +00:00
def _real_extract(self, url):
video_id = self._match_id(url)
2015-08-07 18:06:03 +00:00
Switch codebase to use sanitized_Request instead of compat_urllib_request.Request [downloader/dash] Use sanitized_Request [downloader/http] Use sanitized_Request [atresplayer] Use sanitized_Request [bambuser] Use sanitized_Request [bliptv] Use sanitized_Request [brightcove] Use sanitized_Request [cbs] Use sanitized_Request [ceskatelevize] Use sanitized_Request [collegerama] Use sanitized_Request [extractor/common] Use sanitized_Request [crunchyroll] Use sanitized_Request [dailymotion] Use sanitized_Request [dcn] Use sanitized_Request [dramafever] Use sanitized_Request [dumpert] Use sanitized_Request [eitb] Use sanitized_Request [escapist] Use sanitized_Request [everyonesmixtape] Use sanitized_Request [extremetube] Use sanitized_Request [facebook] Use sanitized_Request [fc2] Use sanitized_Request [flickr] Use sanitized_Request [4tube] Use sanitized_Request [gdcvault] Use sanitized_Request [extractor/generic] Use sanitized_Request [hearthisat] Use sanitized_Request [hotnewhiphop] Use sanitized_Request [hypem] Use sanitized_Request [iprima] Use sanitized_Request [ivi] Use sanitized_Request [keezmovies] Use sanitized_Request [letv] Use sanitized_Request [lynda] Use sanitized_Request [metacafe] Use sanitized_Request [minhateca] Use sanitized_Request [miomio] Use sanitized_Request [meovideo] Use sanitized_Request [mofosex] Use sanitized_Request [moniker] Use sanitized_Request [mooshare] Use sanitized_Request [movieclips] Use sanitized_Request [mtv] Use sanitized_Request [myvideo] Use sanitized_Request [neteasemusic] Use sanitized_Request [nfb] Use sanitized_Request [niconico] Use sanitized_Request [noco] Use sanitized_Request [nosvideo] Use sanitized_Request [novamov] Use sanitized_Request [nowness] Use sanitized_Request [nuvid] Use sanitized_Request [played] Use sanitized_Request [pluralsight] Use sanitized_Request [pornhub] Use sanitized_Request [pornotube] Use sanitized_Request [primesharetv] Use sanitized_Request [promptfile] Use sanitized_Request [qqmusic] Use sanitized_Request [rtve] Use sanitized_Request [safari] Use sanitized_Request [sandia] Use sanitized_Request [shared] Use sanitized_Request [sharesix] Use sanitized_Request [sina] Use sanitized_Request [smotri] Use sanitized_Request [sohu] Use sanitized_Request [spankwire] Use sanitized_Request [sportdeutschland] Use sanitized_Request [streamcloud] Use sanitized_Request [streamcz] Use sanitized_Request [tapely] Use sanitized_Request [tube8] Use sanitized_Request [tubitv] Use sanitized_Request [twitch] Use sanitized_Request [twitter] Use sanitized_Request [udemy] Use sanitized_Request [vbox7] Use sanitized_Request [veoh] Use sanitized_Request [vessel] Use sanitized_Request [vevo] Use sanitized_Request [viddler] Use sanitized_Request [videomega] Use sanitized_Request [viewvster] Use sanitized_Request [viki] Use sanitized_Request [vk] Use sanitized_Request [vodlocker] Use sanitized_Request [voicerepublic] Use sanitized_Request [wistia] Use sanitized_Request [xfileshare] Use sanitized_Request [xtube] Use sanitized_Request [xvideos] Use sanitized_Request [yandexmusic] Use sanitized_Request [youku] Use sanitized_Request [youporn] Use sanitized_Request [youtube] Use sanitized_Request [patreon] Use sanitized_Request [extractor/common] Remove unused import [nfb] PEP 8
2015-11-21 16:18:17 +00:00
request = sanitized_Request(
2015-08-07 18:06:03 +00:00
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
headers={'Origin': 'http://www.dcndigital.ae'})
2015-12-27 08:56:15 +00:00
video_data = self._download_json(request, video_id)
info = self._extract_video_info(video_data, video_id, False)
2015-08-07 18:06:03 +00:00
2015-07-18 08:51:59 +00:00
webpage = self._download_webpage(
2015-09-05 07:38:05 +00:00
'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' +
compat_urllib_parse_urlencode({
2015-12-27 08:56:15 +00:00
'id': video_data['id'],
'user_id': video_data['user_id'],
'signature': video_data['signature'],
2015-08-07 18:06:03 +00:00
'countries': 'Q0M=',
'filter': 'DENY',
}), video_id)
2015-12-27 08:56:15 +00:00
info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native')
return info
2015-08-07 18:06:03 +00:00
2015-12-27 08:56:15 +00:00
class DCNLiveIE(DCNBaseIE):
IE_NAME = 'dcn:live'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)'
def _real_extract(self, url):
channel_id = self._match_id(url)
request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
headers={'Origin': 'http://www.dcndigital.ae'})
2015-12-27 08:56:15 +00:00
channel_data = self._download_json(request, channel_id)
info = self._extract_video_info(channel_data, channel_id, True)
webpage = self._download_webpage(
'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' +
compat_urllib_parse_urlencode({
2015-12-27 08:56:15 +00:00
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
'signature': channel_data['signature'],
'countries': 'Q0M=',
'filter': 'DENY',
}), channel_id)
2015-12-27 08:56:15 +00:00
info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8')
return info
class DCNSeasonIE(InfoExtractor):
IE_NAME = 'dcn:season'
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))'
_TEST = {
'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A',
'info_dict':
{
'id': '7910',
'title': 'محاضرات الشيخ الشعراوي',
},
'playlist_mincount': 27,
}
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
show_id, season_id = re.match(self._VALID_URL, url).groups()
data = {}
if season_id:
data['season'] = season_id
show_id = smuggled_data.get('show_id')
if show_id is None:
request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
headers={'Origin': 'http://www.dcndigital.ae'})
season = self._download_json(request, season_id)
show_id = season['id']
data['show_id'] = show_id
request = sanitized_Request(
'http://admin.mangomolo.com/analytics/index.php/plus/show',
urlencode_postdata(data),
{
'Origin': 'http://www.dcndigital.ae',
'Content-Type': 'application/x-www-form-urlencoded'
2015-08-07 18:06:03 +00:00
})
show = self._download_json(request, show_id)
2015-10-31 14:40:11 +00:00
if not season_id:
season_id = show['default_season']
for season in show['seasons']:
if season['id'] == season_id:
title = season.get('title_en') or season['title_ar']
2015-08-07 18:06:03 +00:00
2015-10-31 14:40:11 +00:00
entries = []
for video in show['videos']:
video_id = compat_str(video['id'])
2015-12-27 08:56:15 +00:00
entries.append(self.url_result(
'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id))
2015-08-07 18:06:03 +00:00
2015-10-31 14:40:11 +00:00
return self.playlist_result(entries, season_id, title)