mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-26 02:55:17 +00:00
[aparat] Add support (Fixes #2012)
This commit is contained in:
parent
768df74538
commit
aa94a6d315
5 changed files with 73 additions and 6 deletions
|
@ -1,6 +1,7 @@
|
||||||
from .academicearth import AcademicEarthCourseIE
|
from .academicearth import AcademicEarthCourseIE
|
||||||
from .addanime import AddAnimeIE
|
from .addanime import AddAnimeIE
|
||||||
from .anitube import AnitubeIE
|
from .anitube import AnitubeIE
|
||||||
|
from .aparat import AparatIE
|
||||||
from .appletrailers import AppleTrailersIE
|
from .appletrailers import AppleTrailersIE
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
from .ard import ARDIE
|
from .ard import ARDIE
|
||||||
|
|
56
youtube_dl/extractor/aparat.py
Normal file
56
youtube_dl/extractor/aparat.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
#coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
HEADRequest,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AparatIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.aparat.com/v/wP8On',
|
||||||
|
u'file': u'wP8On.mp4',
|
||||||
|
u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"تیم گلکسی 11 - زومیت",
|
||||||
|
},
|
||||||
|
#u'skip': u'Extremely unreliable',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
video_id = m.group('id')
|
||||||
|
|
||||||
|
# Note: There is an easier-to-parse configuration at
|
||||||
|
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||||
|
# but the URL in there does not work
|
||||||
|
embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
|
||||||
|
video_id + u'/vt/frame')
|
||||||
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
|
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
|
||||||
|
for i, video_url in enumerate(video_urls):
|
||||||
|
req = HEADRequest(video_url)
|
||||||
|
res = self._request_webpage(
|
||||||
|
req, video_id, note=u'Testing video URL %d' % i, errnote=False)
|
||||||
|
if res:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ExtractorError(u'No working video URLs found')
|
||||||
|
|
||||||
|
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
|
@ -170,6 +170,8 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
|
||||||
try:
|
try:
|
||||||
return self._downloader.urlopen(url_or_request)
|
return self._downloader.urlopen(url_or_request)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
if errnote is False:
|
||||||
|
return False
|
||||||
if errnote is None:
|
if errnote is None:
|
||||||
errnote = u'Unable to download webpage'
|
errnote = u'Unable to download webpage'
|
||||||
errmsg = u'%s: %s' % (errnote, compat_str(err))
|
errmsg = u'%s: %s' % (errnote, compat_str(err))
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
HEADRequest,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
@ -109,21 +110,18 @@ def report_following_redirect(self, new_url):
|
||||||
|
|
||||||
def _send_head(self, url):
|
def _send_head(self, url):
|
||||||
"""Check if it is a redirect, like url shorteners, in case return the new url."""
|
"""Check if it is a redirect, like url shorteners, in case return the new url."""
|
||||||
class HeadRequest(compat_urllib_request.Request):
|
|
||||||
def get_method(self):
|
|
||||||
return "HEAD"
|
|
||||||
|
|
||||||
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||||
"""
|
"""
|
||||||
Subclass the HTTPRedirectHandler to make it use our
|
Subclass the HTTPRedirectHandler to make it use our
|
||||||
HeadRequest also on the redirected URL
|
HEADRequest also on the redirected URL
|
||||||
"""
|
"""
|
||||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||||
if code in (301, 302, 303, 307):
|
if code in (301, 302, 303, 307):
|
||||||
newurl = newurl.replace(' ', '%20')
|
newurl = newurl.replace(' ', '%20')
|
||||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||||
if k.lower() not in ("content-length", "content-type"))
|
if k.lower() not in ("content-length", "content-type"))
|
||||||
return HeadRequest(newurl,
|
return HEADRequest(newurl,
|
||||||
headers=newheaders,
|
headers=newheaders,
|
||||||
origin_req_host=req.get_origin_req_host(),
|
origin_req_host=req.get_origin_req_host(),
|
||||||
unverifiable=True)
|
unverifiable=True)
|
||||||
|
@ -152,7 +150,7 @@ def http_error_405(self, req, fp, code, msg, headers):
|
||||||
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
|
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
|
||||||
opener.add_handler(handler())
|
opener.add_handler(handler())
|
||||||
|
|
||||||
response = opener.open(HeadRequest(url))
|
response = opener.open(HEADRequest(url))
|
||||||
if response is None:
|
if response is None:
|
||||||
raise ExtractorError(u'Invalid URL protocol')
|
raise ExtractorError(u'Invalid URL protocol')
|
||||||
return response
|
return response
|
||||||
|
@ -296,6 +294,11 @@ def _real_extract(self, url):
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return OoyalaIE._build_url_result(mobj.group(1))
|
return OoyalaIE._build_url_result(mobj.group(1))
|
||||||
|
|
||||||
|
# Look for Aparat videos
|
||||||
|
mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(mobj.group(1), 'Aparat')
|
||||||
|
|
||||||
# Start with something easy: JW Player in SWFObject
|
# Start with something easy: JW Player in SWFObject
|
||||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
|
|
|
@ -1093,3 +1093,8 @@ def remove_start(s, start):
|
||||||
def url_basename(url):
|
def url_basename(url):
|
||||||
path = compat_urlparse.urlparse(url).path
|
path = compat_urlparse.urlparse(url).path
|
||||||
return path.strip(u'/').split(u'/')[-1]
|
return path.strip(u'/').split(u'/')[-1]
|
||||||
|
|
||||||
|
|
||||||
|
class HEADRequest(compat_urllib_request.Request):
|
||||||
|
def get_method(self):
|
||||||
|
return "HEAD"
|
||||||
|
|
Loading…
Reference in a new issue