[mixcloud] Shed API dependency (#2904)

This commit is contained in:
Philipp Hagemeister 2014-05-13 09:42:38 +02:00
parent d0a122348e
commit 57c7411f46
2 changed files with 49 additions and 13 deletions

View File

@ -556,6 +556,16 @@ def http_scheme(self):
if self._downloader.params.get('prefer_insecure', False) if self._downloader.params.get('prefer_insecure', False)
else 'https:') else 'https:')
def _proto_relative_url(self, url, scheme=None):
if url is None:
return url
if url.startswith('//'):
if scheme is None:
scheme = self.http_scheme()
return scheme + url
else:
return url
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@ -4,9 +4,10 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unified_strdate,
compat_urllib_parse, compat_urllib_parse,
ExtractorError, ExtractorError,
int_or_none,
parse_iso8601,
) )
@ -24,6 +25,10 @@ class MixcloudIE(InfoExtractor):
'uploader': 'Daniel Holbach', 'uploader': 'Daniel Holbach',
'uploader_id': 'dholbach', 'uploader_id': 'dholbach',
'upload_date': '20111115', 'upload_date': '20111115',
'timestamp': 1321359578,
'thumbnail': 're:https?://.*\.jpg',
'view_count': int,
'like_count': int,
}, },
} }
@ -51,10 +56,6 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, track_id) webpage = self._download_webpage(url, track_id)
api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
info = self._download_json(
api_url, track_id, 'Downloading cloudcast info')
preview_url = self._search_regex( preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url') r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/') song_url = preview_url.replace('/previews/', '/c/originals/')
@ -65,16 +66,41 @@ def _real_extract(self, url):
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
final_song_url = self._get_url(template_url) final_song_url = self._get_url(template_url)
if final_song_url is None: if final_song_url is None:
raise ExtractorError(u'Unable to extract track url') raise ExtractorError('Unable to extract track url')
PREFIX = (
r'<div class="cloudcast-play-button-container"'
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
title = self._html_search_regex(
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
thumbnail = self._proto_relative_url(self._html_search_regex(
PREFIX + r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail',
fatal=False))
uploader = self._html_search_regex(
PREFIX + r'm-owner-name="([^"]+)"',
webpage, 'uploader', fatal=False)
uploader_id = self._search_regex(
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
like_count = int_or_none(self._search_regex(
r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
webpage, 'like count', fatal=False))
view_count = int_or_none(self._search_regex(
r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
webpage, 'play count', fatal=False))
timestamp = parse_iso8601(self._search_regex(
r'<time itemprop="dateCreated" datetime="([^"]+)">',
webpage, 'upload date'))
return { return {
'id': track_id, 'id': track_id,
'title': info['name'], 'title': title,
'url': final_song_url, 'url': final_song_url,
'description': info.get('description'), 'description': description,
'thumbnail': info['pictures'].get('extra_large'), 'thumbnail': thumbnail,
'uploader': info['user']['name'], 'uploader': uploader,
'uploader_id': info['user']['username'], 'uploader_id': uploader_id,
'upload_date': unified_strdate(info['created_time']), 'timestamp': timestamp,
'view_count': info['play_count'], 'view_count': view_count,
'like_count': like_count,
} }