0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-26 02:55:17 +00:00

[cliphunter] Fix extraction and update test (Fixes #4362)

This commit is contained in:
Naglis Jonaitis 2015-01-23 21:22:49 +02:00
parent 6896a52721
commit 3e055aa5c3

View file

@ -1,9 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import determine_ext
_translation_table = { _translation_table = {
@ -27,10 +25,10 @@ class CliphunterIE(InfoExtractor):
''' '''
_TEST = { _TEST = {
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
'md5': 'a2ba71eebf523859fe527a61018f723e', 'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
'info_dict': { 'info_dict': {
'id': '1012420', 'id': '1012420',
'ext': 'mp4', 'ext': 'flv',
'title': 'Fun Jynx Maze solo', 'title': 'Fun Jynx Maze solo',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'age_limit': 18, 'age_limit': 18,
@ -44,39 +42,31 @@ def _real_extract(self, url):
video_title = self._search_regex( video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title') r'mediaTitle = "([^"]+)"', webpage, 'title')
pl_fiji = self._search_regex( fmts = {}
r'pl_fiji = \'([^\']+)\'', webpage, 'video data') for fmt in ('mp4', 'flv'):
pl_c_qual = self._search_regex( fmt_list = self._parse_json(self._search_regex(
r'pl_c_qual = "(.)"', webpage, 'video quality') r'var %sjson\s*=\s*(\[.*?\]);' % fmt, webpage, '%s formats' % fmt), video_id)
video_url = _decode(pl_fiji) for f in fmt_list:
formats = [{ fmts[f['fname']] = _decode(f['sUrl'])
'url': video_url,
'format_id': 'default-%s' % pl_c_qual,
}]
qualities_json = self._search_regex( qualities = self._parse_json(self._search_regex(
r'var pl_qualities\s*=\s*(.*?);\n', webpage, 'quality info') r'var player_btns\s*=\s*(.*?);\n', webpage, 'quality info'), video_id)
qualities_data = json.loads(qualities_json)
for i, t in enumerate( formats = []
re.findall(r"pl_fiji_([a-z0-9]+)\s*=\s*'([^']+')", webpage)): for fname, url in fmts.items():
quality_id, crypted_url = t
video_url = _decode(crypted_url)
f = { f = {
'format_id': quality_id, 'url': url,
'url': video_url,
'quality': i,
} }
if quality_id in qualities_data: if fname in qualities:
qd = qualities_data[quality_id] qual = qualities[fname]
m = re.match( f.update({
r'''(?x)<b>(?P<width>[0-9]+)x(?P<height>[0-9]+)<\\/b> 'format_id': '%s_%sp' % (determine_ext(url), qual['h']),
\s*\(\s*(?P<tbr>[0-9]+)\s*kb\\/s''', qd) 'width': qual['w'],
if m: 'height': qual['h'],
f['width'] = int(m.group('width')) 'tbr': qual['br'],
f['height'] = int(m.group('height')) })
f['tbr'] = int(m.group('tbr'))
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = self._search_regex( thumbnail = self._search_regex(