From da20951a57bddd4a0102cd776ff93a2adc6db77d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Aug 2017 22:39:05 +0700 Subject: [PATCH] [mixcloud] Extract decrypt key --- youtube_dl/extractor/mixcloud.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 52f7428e0..fcf7beeb2 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -9,6 +9,7 @@ from ..compat import ( compat_chr, compat_ord, + compat_str, compat_urllib_parse_unquote, compat_urlparse, ) @@ -53,15 +54,18 @@ class MixcloudIE(InfoExtractor): 'only_matching': True, }] + _keys = [ + 'return { requestAnimationFrame: function(callback) { callback(); }, innerHeight: 500 };', + 'pleasedontdownloadourmusictheartistswontgetpaid', + 'window.addEventListener = window.addEventListener || function() {};', + '(function() { return new Date().toLocaleDateString(); })()' + ] + _current_key = None + # See https://www.mixcloud.com/media/js2/www_js_2.9e23256562c080482435196ca3975ab5.js def _decrypt_play_info(self, play_info, video_id): - KEYS = ( - 'pleasedontdownloadourmusictheartistswontgetpaid', - 'window.addEventListener = window.addEventListener || function() {};', - '(function() { return new Date().toLocaleDateString(); })()', - ) play_info = base64.b64decode(play_info.encode('ascii')) - for num, key in enumerate(KEYS, start=1): + for num, key in enumerate(self._keys, start=1): try: return self._parse_json( ''.join([ @@ -69,7 +73,7 @@ def _decrypt_play_info(self, play_info, video_id): for idx, ch in enumerate(play_info)]), video_id) except ExtractorError: - if num == len(KEYS): + if num == len(self._keys): raise def _real_extract(self, url): @@ -80,6 +84,20 @@ def _real_extract(self, url): webpage = self._download_webpage(url, track_id) + if not self._current_key: + js_url = self._search_regex( + r']+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/js2/www_js_4\.[^>]+\.js)', + webpage, 'js url', default=None) + if js_url: + js = self._download_webpage(js_url, track_id, fatal=False) + if js: + key = self._search_regex( + r'player\s*:\s*{.*?\bvalue\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + js, 'key', default=None, group='key') + if key and isinstance(key, compat_str): + self._keys.insert(0, key) + self._current_key = key + message = self._html_search_regex( r'(?s)]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)', webpage, 'error message', default=None)