[extractor/generic] Extract wistia embed code into separate method

This commit is contained in:
Sergey M․ 2017-05-13 21:51:58 +07:00
parent 7ad4362357
commit 58bb440283
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 27 additions and 25 deletions

View File

@ -88,6 +88,7 @@
from .limelight import LimelightBaseIE
from .anvato import AnvatoIE
from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE
class GenericIE(InfoExtractor):
@ -2111,36 +2112,15 @@ def _real_extract(self, url):
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
# Look for embedded Wistia player
match = re.search(
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
if match:
embed_url = self._proto_relative_url(
unescapeHTML(match.group('url')))
wistia_url = WistiaIE._extract_url(webpage)
if wistia_url:
return {
'_type': 'url_transparent',
'url': embed_url,
'ie_key': 'Wistia',
'url': self._proto_relative_url(wistia_url),
'ie_key': WistiaIE.ie_key(),
'uploader': video_uploader,
}
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return {
'_type': 'url_transparent',
'url': 'wistia:%s' % match.group('id'),
'ie_key': 'Wistia',
'uploader': video_uploader,
}
match = re.search(
r'''(?sx)
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
''', webpage)
if match:
return self.url_result(self._proto_relative_url(
'wistia:%s' % match.group('id')), 'Wistia')
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
if svt_url:

View File

@ -1,10 +1,13 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
unescapeHTML,
)
@ -34,6 +37,25 @@ class WistiaIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
match = re.search(
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
if match:
return unescapeHTML(match.group('url'))
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return 'wistia:%s' % match.group('id')
match = re.search(
r'''(?sx)
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
''', webpage)
if match:
return 'wistia:%s' % match.group('id')
def _real_extract(self, url):
video_id = self._match_id(url)