0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-30 03:33:02 +00:00
yt-dlp/yt_dlp/extractor/voot.py
Anant Murmu a4713ba96d
[extractor/voot] Improve _VALID_URL (#5283)
Authored by: freezboltz
2022-10-19 12:25:28 +05:30

150 lines
5.9 KiB
Python

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
try_get,
unified_timestamp,
)
class VootIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
voot:|
https?://(?:www\.)?voot\.com/?
(?:
movies?/[^/]+/|
(?:shows|kids)/(?:[^/]+/){4}
)
)
(?P<id>\d{3,})
'''
_GEO_COUNTRIES = ['IN']
_TESTS = [{
'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
'info_dict': {
'id': '0_8ledb18o',
'ext': 'mp4',
'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
'timestamp': 1472162937,
'upload_date': '20160825',
'series': 'Ishq Ka Rang Safed',
'season_number': 1,
'episode': 'Is this the end of Kamini?',
'episode_number': 340,
'view_count': int,
'like_count': int,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to download m3u8 information'],
}, {
'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
'only_matching': True,
}, {
'url': 'https://www.voot.com/movies/pandavas-5/424627',
'only_matching': True,
}, {
'url': 'https://www.voot.com/movie/fight-club/621842',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
media_info = self._download_json(
'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
query={
'platform': 'Web',
'pId': 2,
'mediaId': video_id,
})
status_code = try_get(media_info, lambda x: x['status']['code'], int)
if status_code != 0:
raise ExtractorError(media_info['status']['message'], expected=True)
media = media_info['assets']
entry_id = media['EntryId']
title = media['MediaName']
formats = self._extract_m3u8_formats(
'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
video_id, 'mp4', m3u8_id='hls')
self._sort_formats(formats)
description, series, season_number, episode, episode_number = [None] * 5
for meta in try_get(media, lambda x: x['Metas'], list) or []:
key, value = meta.get('Key'), meta.get('Value')
if not key or not value:
continue
if key == 'ContentSynopsis':
description = value
elif key == 'RefSeriesTitle':
series = value
elif key == 'RefSeriesSeason':
season_number = int_or_none(value)
elif key == 'EpisodeMainTitle':
episode = value
elif key == 'EpisodeNo':
episode_number = int_or_none(value)
return {
'extractor_key': 'Kaltura',
'id': entry_id,
'title': title,
'description': description,
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'timestamp': unified_timestamp(media.get('CreationDate')),
'duration': int_or_none(media.get('Duration')),
'view_count': int_or_none(media.get('ViewCounter')),
'like_count': int_or_none(media.get('like_counter')),
'formats': formats,
}
class VootSeriesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
_TESTS = [{
'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',
'playlist_mincount': 442,
'info_dict': {
'id': '100002',
},
}, {
'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003',
'playlist_mincount': 341,
'info_dict': {
'id': '100003',
},
}]
_SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common'
_SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}'
def _entries(self, show_id):
show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id)
for season in show_json.get('result', []):
page_num = 1
season_id = try_get(season, lambda x: x['id'], compat_str)
season_json = self._download_json(self._SEASON_API.format(season_id, page_num),
video_id=season_id,
note='Downloading JSON metadata page %d' % page_num)
episodes_json = season_json.get('result', [])
while episodes_json:
page_num += 1
for episode in episodes_json:
video_id = episode.get('id')
yield self.url_result(
'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id)
episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num),
video_id=season_id,
note='Downloading JSON metadata page %d' % page_num)['result']
def _real_extract(self, url):
show_id = self._match_id(url)
return self.playlist_result(self._entries(show_id), playlist_id=show_id)