From 2f1b7afe328267a95cd11bbab3cf80fecc2678a0 Mon Sep 17 00:00:00 2001 From: llamasblade <69692580+llamasblade@users.noreply.github.com> Date: Wed, 13 Jul 2022 19:53:22 +0200 Subject: [PATCH] [extractor/hytale] Add extractor (#4326) Authored by: llamasblade, pukkandan --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/hytale.py | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 yt_dlp/extractor/hytale.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index daef6a83d..e0721608b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -665,6 +665,7 @@ HungamaAlbumPlaylistIE, ) from .hypem import HypemIE +from .hytale import HytaleIE from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveIE, diff --git a/yt_dlp/extractor/hytale.py b/yt_dlp/extractor/hytale.py new file mode 100644 index 000000000..0f4dcc309 --- /dev/null +++ b/yt_dlp/extractor/hytale.py @@ -0,0 +1,58 @@ +import re + +from .common import InfoExtractor +from ..utils import traverse_obj + + +class HytaleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hytale\.com/news/\d+/\d+/(?P[a-z0-9-]+)' + _TESTS = [{ + 'url': 'https://hytale.com/news/2021/07/summer-2021-development-update', + 'info_dict': { + 'id': 'summer-2021-development-update', + 'title': 'Summer 2021 Development Update', + }, + 'playlist_count': 4, + 'playlist': [{ + 'md5': '0854ebe347d233ee19b86ab7b2ead610', + 'info_dict': { + 'id': 'ed51a2609d21bad6e14145c37c334999', + 'ext': 'mp4', + 'title': 'Avatar Personalization', + 'thumbnail': r're:https://videodelivery\.net/\w+/thumbnails/thumbnail\.jpg', + } + }] + }, { + 'url': 'https://www.hytale.com/news/2019/11/hytale-graphics-update', + 'info_dict': { + 'id': 'hytale-graphics-update', + 'title': 'Hytale graphics update', + }, + 'playlist_count': 2, + }] + + def _real_initialize(self): + media_webpage = self._download_webpage( + 'https://hytale.com/media', None, note='Downloading list of media', fatal=False) or '' + + clips_json = traverse_obj( + self._search_json( + r'window\.__INITIAL_COMPONENTS_STATE__\s*=\s*\[', + media_webpage, 'clips json', None), + ('media', 'clips')) or [] + + self._titles = {clip.get('src'): clip.get('caption') for clip in clips_json} + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + entries = [ + self.url_result( + f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com', + title=self._titles.get(video_hash), url_transparent=True) + for video_hash in re.findall( + r'