[streamcloud] Add IE (Fixes #1801)

2024-11-30 03:33:02 +00:00 · 2013-11-22 17:19:22 +01:00 · 2013-11-22 17:19:22 +01:00 · 02e4ebbbad
commit 02e4ebbbad
parent ab009f59ef
2 changed files with 66 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -128,6 +128,7 @@
 from .stanfordoc import StanfordOpenClassroomIE
 from .statigram import StatigramIE
 from .steam import SteamIE
 from .streamcloud import StreamcloudIE
 from .sztvhu import SztvHuIE
 from .teamcoco import TeamcocoIE
 from .techtalks import TechTalksIE
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@ -0,0 +1,65 @@
 # coding: utf-8
 import re
 import time
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
 )
 class StreamcloudIE(InfoExtractor):
    IE_NAME = u'streamcloud.eu'
    _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
    _TEST = {
        u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
        u'file': u'skp9j99s4bpz.mp4',
        u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
        u'info_dict': {
            u'title': u'youtube-dl test video  \'/\\ ä ↭',
            u'duration': 9,
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        orig_webpage = self._download_webpage(url, video_id)
        fields = re.findall(r'''(?x)<input\s+
            type="(?:hidden|submit)"\s+
            name="([^"]+)"\s+
            (?:id="[^"]+"\s+)?
            value="([^"]*)"
            ''', orig_webpage)
        post = compat_urllib_parse.urlencode(fields)
        self.to_screen('%s: Waiting for timeout' % video_id)
        time.sleep(12)
        headers = {
            b'Content-Type': b'application/x-www-form-urlencoded',
        }
        req = compat_urllib_request.Request(url, post, headers)
        webpage = self._download_webpage(
            req, video_id, note=u'Downloading video page ...')
        title = self._html_search_regex(
            r'<h1[^>]*>([^<]+)<', webpage, u'title')
        video_url = self._search_regex(
            r'file:\s*"([^"]+)"', webpage, u'video URL')
        duration_str = self._search_regex(
            r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
        duration = None if duration_str is None else int(duration_str)
        thumbnail = self._search_regex(
            r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'duration': duration,
            'thumbnail': thumbnail,
        }