From 7198063d96003050eccb0ea59cc938f0388c0606 Mon Sep 17 00:00:00 2001
From: Mister Hat <misterhat144@gmail.com>
Date: Sun, 24 May 2015 15:26:59 -0500
Subject: [PATCH 01/29] [pinkbike] new extractor

---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/pinkbike.py | 78 ++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
 create mode 100644 youtube_dl/extractor/pinkbike.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 79bcd9106..80bec39da 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -394,6 +394,7 @@
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
+from .pinkbike import PinkbikeIE
 from .planetaplay import PlanetaPlayIE
 from .pladform import PladformIE
 from .played import PlayedIE
diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py
new file mode 100644
index 000000000..4a15c1835
--- /dev/null
+++ b/youtube_dl/extractor/pinkbike.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class PinkbikeIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?pinkbike\.com/video/(?P<id>[0-9]+)'
+    _TESTS = [{
+        'url': 'http://www.pinkbike.com/video/402811/',
+        'md5': '4814b8ca7651034cd87e3361d5c2155a',
+        'info_dict': {
+            'id': '402811',
+            'ext': 'mp4',
+            'title': 'Brandon Semenuk - RAW 100',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'location': 'Victoria, British Columbia, Canada',
+            'uploader_id': 'revelco',
+            'upload_date': '20150406',
+            'description': 'Official release: www.redbull.ca/rupertwalker',
+            'duration': '100'
+        }
+    }, {
+        'url': 'http://www.pinkbike.com/video/406629/',
+        'md5': 'c7a3e19a2bd5cde5a1cda6b2b46caa74',
+        'info_dict': {
+            'id': '406629',
+            'ext': 'mp4',
+            'title': 'Chromag: Reece Wallace in Utah',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'location': 'Whistler, British Columbia, Canada',
+            'uploader_id': 'Chromagbikes',
+            'upload_date': '20150505',
+            'description': 'Reece Wallace shredding Virgin, Utah. Video by Virtu Media.',
+            'duration': '180'
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
+        title = title[:-len(' Video - Pinkbike')]
+
+        description = self._html_search_meta('description', webpage, 'description')
+        description = description[len(title + '. '):]
+
+        uploader_id = self._html_search_regex(r'un:\s*"(.*?)"', webpage, 'uploader_id')
+
+        upload_date = self._html_search_regex(
+            r'class="fullTime"\s*title="([0-9]{4}(?:-[0-9]{2}){2})"',
+            webpage, 'upload_date')
+        upload_date = upload_date.replace('-', '')
+
+        location = self._html_search_regex(
+            r'<dt>Location</dt>\n?\s*<dd>\n?(.*?)\s*<img',
+            webpage, 'location')
+
+        formats = re.findall(
+            r'<source data-quality=\\"([0-9]+)p\\" src=\\"(.*?)\\">',
+            webpage)
+
+        formats = [{'url': fmt[1], 'height': fmt[0]} for fmt in formats]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': self._html_search_meta('video:duration', webpage, 'duration'),
+            'thumbnail': self._html_search_meta('og:image', webpage, 'thumbnail'),
+            'uploader_id': uploader_id,
+            'upload_date': upload_date,
+            'location': location,
+            'formats': formats
+        }

From 2c935c0c7224a3332ff9f0fd83e8c074cfbe2c9d Mon Sep 17 00:00:00 2001
From: Mister Hat <misterhat144@gmail.com>
Date: Sun, 24 May 2015 16:30:03 -0500
Subject: [PATCH 02/29] [pinkbike] converted duration to int

---
 youtube_dl/extractor/pinkbike.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py
index 4a15c1835..66605ddbe 100644
--- a/youtube_dl/extractor/pinkbike.py
+++ b/youtube_dl/extractor/pinkbike.py
@@ -20,7 +20,7 @@ class PinkbikeIE(InfoExtractor):
             'uploader_id': 'revelco',
             'upload_date': '20150406',
             'description': 'Official release: www.redbull.ca/rupertwalker',
-            'duration': '100'
+            'duration': 100
         }
     }, {
         'url': 'http://www.pinkbike.com/video/406629/',
@@ -34,7 +34,7 @@ class PinkbikeIE(InfoExtractor):
             'uploader_id': 'Chromagbikes',
             'upload_date': '20150505',
             'description': 'Reece Wallace shredding Virgin, Utah. Video by Virtu Media.',
-            'duration': '180'
+            'duration': 180
         }
     }]
 
@@ -69,7 +69,7 @@ def _real_extract(self, url):
             'id': video_id,
             'title': title,
             'description': description,
-            'duration': self._html_search_meta('video:duration', webpage, 'duration'),
+            'duration': int(self._html_search_meta('video:duration', webpage, 'duration')),
             'thumbnail': self._html_search_meta('og:image', webpage, 'thumbnail'),
             'uploader_id': uploader_id,
             'upload_date': upload_date,

From 680f9744c4e010ad5111c7711c58c341d5ba24dd Mon Sep 17 00:00:00 2001
From: Mister Hat <misterhat144@gmail.com>
Date: Sun, 24 May 2015 16:45:10 -0500
Subject: [PATCH 03/29] [pinkbike] used proper conversion methods

---
 youtube_dl/extractor/pinkbike.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py
index 66605ddbe..45c0b1377 100644
--- a/youtube_dl/extractor/pinkbike.py
+++ b/youtube_dl/extractor/pinkbike.py
@@ -4,6 +4,11 @@
 import re
 
 from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    remove_end,
+    remove_start
+)
 
 
 class PinkbikeIE(InfoExtractor):
@@ -43,10 +48,13 @@ def _real_extract(self, url):
         webpage = self._download_webpage(url, video_id)
 
         title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
-        title = title[:-len(' Video - Pinkbike')]
+        title = remove_end(title, ' Video - Pinkbike')
 
         description = self._html_search_meta('description', webpage, 'description')
-        description = description[len(title + '. '):]
+        description = remove_start(description, title + '. ')
+
+        duration = int_or_none(self._html_search_meta(
+            'video:duration', webpage, 'duration'))
 
         uploader_id = self._html_search_regex(r'un:\s*"(.*?)"', webpage, 'uploader_id')
 
@@ -63,13 +71,13 @@ def _real_extract(self, url):
             r'<source data-quality=\\"([0-9]+)p\\" src=\\"(.*?)\\">',
             webpage)
 
-        formats = [{'url': fmt[1], 'height': fmt[0]} for fmt in formats]
+        formats = [{'url': fmt[1], 'height': int_or_none(fmt[0])} for fmt in formats]
 
         return {
             'id': video_id,
             'title': title,
             'description': description,
-            'duration': int(self._html_search_meta('video:duration', webpage, 'duration')),
+            'duration': duration,
             'thumbnail': self._html_search_meta('og:image', webpage, 'thumbnail'),
             'uploader_id': uploader_id,
             'upload_date': upload_date,

From c9bebed294dd29d9188265c8f7bfb0e1b43406ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 19 Jun 2015 20:52:44 +0600
Subject: [PATCH 04/29] [youtube] Add itag 59 and 78 (Closes #5979)

---
 youtube_dl/extractor/youtube.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9e2671192..a3da56c14 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -234,6 +234,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         '44': {'ext': 'webm', 'width': 854, 'height': 480},
         '45': {'ext': 'webm', 'width': 1280, 'height': 720},
         '46': {'ext': 'webm', 'width': 1920, 'height': 1080},
+        '59': {'ext': 'mp4', 'width': 854, 'height': 480},
+        '78': {'ext': 'mp4', 'width': 854, 'height': 480},
 
 
         # 3d videos

From cbcd1a5474dd8b39e68b0d2bbc493701c655a2d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 19 Jun 2015 21:57:31 +0600
Subject: [PATCH 05/29] [dramafever] Add support for authentication (Closes
 #6017)

---
 youtube_dl/extractor/dramafever.py | 38 ++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index a34aad486..cfbcddcef 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -6,6 +6,8 @@
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
+    compat_urllib_parse,
+    compat_urllib_request,
     compat_urlparse,
 )
 from ..utils import (
@@ -17,7 +19,39 @@
 )
 
 
-class DramaFeverIE(InfoExtractor):
+class DramaFeverBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
+    _NETRC_MACHINE = 'dramafever'
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'username': username,
+            'password': password,
+        }
+
+        request = compat_urllib_request.Request(
+            self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+        response = self._download_webpage(
+            request, None, 'Logging in as %s' % username)
+
+        if all(logout_pattern not in response
+               for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
+            error = self._html_search_regex(
+                r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<',
+                response, 'error message', default=None)
+            if error:
+                raise ExtractorError('Unable to login: %s' % error, expected=True)
+            raise ExtractorError('Unable to log in')
+
+
+class DramaFeverIE(DramaFeverBaseIE):
     IE_NAME = 'dramafever'
     _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
     _TEST = {
@@ -97,7 +131,7 @@ def _real_extract(self, url):
         }
 
 
-class DramaFeverSeriesIE(InfoExtractor):
+class DramaFeverSeriesIE(DramaFeverBaseIE):
     IE_NAME = 'dramafever:series'
     _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
     _TESTS = [{

From 10464af5d1d03a3461286a601ae7db91c5a8141c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Fri, 19 Jun 2015 22:02:07 +0600
Subject: [PATCH 06/29] [dramafever:series] Fix extraction while authenticated

---
 youtube_dl/extractor/dramafever.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index cfbcddcef..ca41a3abf 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -185,8 +185,11 @@ def _real_extract(self, url):
                 % (consumer_secret, series_id, self._PAGE_SIZE, page_num),
                 series_id, 'Downloading episodes JSON page #%d' % page_num)
             for episode in episodes.get('value', []):
+                episode_url = episode.get('episode_url')
+                if not episode_url:
+                    continue
                 entries.append(self.url_result(
-                    compat_urlparse.urljoin(url, episode['episode_url']),
+                    compat_urlparse.urljoin(url, episode_url),
                     'DramaFever', episode.get('guid')))
             if page_num == episodes['num_pages']:
                 break

From 385c3e5e91680dcc6573f05e6b30fdf45048503e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 20 Jun 2015 00:10:08 +0600
Subject: [PATCH 07/29] [pinkbike] Improve and simplify

---
 youtube_dl/extractor/pinkbike.py | 88 ++++++++++++++++++--------------
 1 file changed, 49 insertions(+), 39 deletions(-)

diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py
index 45c0b1377..745433b48 100644
--- a/youtube_dl/extractor/pinkbike.py
+++ b/youtube_dl/extractor/pinkbike.py
@@ -7,12 +7,14 @@
 from ..utils import (
     int_or_none,
     remove_end,
-    remove_start
+    remove_start,
+    str_to_int,
+    unified_strdate,
 )
 
 
 class PinkbikeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?pinkbike\.com/video/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://www.pinkbike.com/video/402811/',
         'md5': '4814b8ca7651034cd87e3361d5c2155a',
@@ -20,67 +22,75 @@ class PinkbikeIE(InfoExtractor):
             'id': '402811',
             'ext': 'mp4',
             'title': 'Brandon Semenuk - RAW 100',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'location': 'Victoria, British Columbia, Canada',
-            'uploader_id': 'revelco',
-            'upload_date': '20150406',
             'description': 'Official release: www.redbull.ca/rupertwalker',
-            'duration': 100
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 100,
+            'upload_date': '20150406',
+            'uploader': 'revelco',
+            'location': 'Victoria, British Columbia, Canada',
+            'view_count': int,
+            'comment_count': int,
         }
     }, {
-        'url': 'http://www.pinkbike.com/video/406629/',
-        'md5': 'c7a3e19a2bd5cde5a1cda6b2b46caa74',
-        'info_dict': {
-            'id': '406629',
-            'ext': 'mp4',
-            'title': 'Chromag: Reece Wallace in Utah',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'location': 'Whistler, British Columbia, Canada',
-            'uploader_id': 'Chromagbikes',
-            'upload_date': '20150505',
-            'description': 'Reece Wallace shredding Virgin, Utah. Video by Virtu Media.',
-            'duration': 180
-        }
+        'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
-        title = remove_end(title, ' Video - Pinkbike')
+        webpage = self._download_webpage(
+            'http://www.pinkbike.com/video/%s' % video_id, video_id)
 
-        description = self._html_search_meta('description', webpage, 'description')
-        description = remove_start(description, title + '. ')
+        formats = []
+        for _, format_id, src in re.findall(
+            r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
+            height = int_or_none(self._search_regex(
+                r'^(\d+)[pP]$', format_id, 'height', default=None))
+            formats.append({
+                'url': src,
+                'format_id': format_id,
+                'height': height,
+            })
+        self._sort_formats(formats)
 
+        title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
+        description = self._html_search_regex(
+            r'(?s)id="media-description"[^>]*>(.+?)<',
+            webpage, 'description', default=None) or remove_start(
+            self._og_search_description(webpage), title + '. ')
+        thumbnail = self._og_search_thumbnail(webpage)
         duration = int_or_none(self._html_search_meta(
             'video:duration', webpage, 'duration'))
 
-        uploader_id = self._html_search_regex(r'un:\s*"(.*?)"', webpage, 'uploader_id')
-
-        upload_date = self._html_search_regex(
-            r'class="fullTime"\s*title="([0-9]{4}(?:-[0-9]{2}){2})"',
-            webpage, 'upload_date')
-        upload_date = upload_date.replace('-', '')
+        uploader = self._search_regex(
+            r'un:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
+        upload_date = unified_strdate(self._search_regex(
+            r'class="fullTime"[^>]+title="([^"]+)"',
+            webpage, 'upload date', fatal=False))
 
         location = self._html_search_regex(
-            r'<dt>Location</dt>\n?\s*<dd>\n?(.*?)\s*<img',
-            webpage, 'location')
+            r'(?s)<dt>Location</dt>\s*<dd>(.+?)<',
+            webpage, 'location', fatal=False)
 
-        formats = re.findall(
-            r'<source data-quality=\\"([0-9]+)p\\" src=\\"(.*?)\\">',
-            webpage)
+        def extract_count(webpage, label):
+            return str_to_int(self._search_regex(
+                r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label,
+                webpage, label, fatal=False))
 
-        formats = [{'url': fmt[1], 'height': int_or_none(fmt[0])} for fmt in formats]
+        view_count = extract_count(webpage, 'Views')
+        comment_count = extract_count(webpage, 'Comments')
 
         return {
             'id': video_id,
             'title': title,
             'description': description,
+            'thumbnail': thumbnail,
             'duration': duration,
-            'thumbnail': self._html_search_meta('og:image', webpage, 'thumbnail'),
-            'uploader_id': uploader_id,
             'upload_date': upload_date,
+            'uploader': uploader,
             'location': location,
+            'view_count': view_count,
+            'comment_count': comment_count,
             'formats': formats
         }

From 16d6973f8a9eb5a70c12d82aa40f57c2b4aa8c6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 20 Jun 2015 00:49:28 +0600
Subject: [PATCH 08/29] [viki] Pass session token around (#6005)

---
 youtube_dl/extractor/viki.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 52d10d242..51cdc6b65 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -28,11 +28,15 @@ class VikiBaseIE(InfoExtractor):
 
     _NETRC_MACHINE = 'viki'
 
+    _token = None
+
     def _prepare_call(self, path, timestamp=None, post_data=None):
         path += '?' if '?' not in path else '&'
         if not timestamp:
             timestamp = int(time.time())
         query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+        if self._token:
+            query += '&token=%s' % self._token
         sig = hmac.new(
             self._APP_SECRET.encode('ascii'),
             query.encode('ascii'),
@@ -76,10 +80,14 @@ def _login(self):
             'password': password,
         }
 
-        self._call_api(
+        login = self._call_api(
             'sessions.json', None,
             'Logging in as %s' % username, post_data=login_form)
 
+        self._token = login.get('token')
+        if not self._token:
+            self.report_warning('Unable to get session token, login has probably failed')
+
 
 class VikiIE(VikiBaseIE):
     IE_NAME = 'viki'

From 964afd0689bdd7140b8ab182273d6379fe7b0548 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 03:12:17 +0800
Subject: [PATCH 09/29] [xvideos] Support lower-quality formats found on
 Android

Closes #5968
---
 youtube_dl/extractor/xvideos.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py
index 2a45dc574..d8415bed4 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -5,10 +5,12 @@
 from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
+    compat_urllib_request,
 )
 from ..utils import (
     clean_html,
     ExtractorError,
+    determine_ext,
 )
 
 
@@ -25,6 +27,8 @@ class XVideosIE(InfoExtractor):
         }
     }
 
+    _ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19'
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
@@ -40,9 +44,30 @@ def _real_extract(self, url):
         video_thumbnail = self._search_regex(
             r'url_bigthumb=(.+?)&amp', webpage, 'thumbnail', fatal=False)
 
+        formats = [{
+            'url': video_url,
+        }]
+
+        android_req = compat_urllib_request.Request(url)
+        android_req.add_header('User-Agent', self._ANDROID_USER_AGENT)
+        android_webpage = self._download_webpage(android_req, video_id, fatal=False)
+
+        if android_webpage is not None:
+            player_params_str = self._search_regex(
+                'mobileReplacePlayerDivTwoQual\(([^)]+)\)',
+                android_webpage, 'player parameters', default='')
+            player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(',')))
+            if player_params:
+                formats.extend([{
+                    'url': param,
+                    'preference': -10,
+                } for param in player_params if determine_ext(param) == 'mp4'])
+
+        self._sort_formats(formats)
+
         return {
             'id': video_id,
-            'url': video_url,
+            'formats': formats,
             'title': video_title,
             'ext': 'flv',
             'thumbnail': video_thumbnail,

From c9ac7fa909fb969ac21a6d168d09803119b018c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 21 Jun 2015 04:17:54 +0600
Subject: [PATCH 10/29] [imdb] Fix extraction

---
 youtube_dl/extractor/imdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index f29df36b5..4bb574cf3 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -46,7 +46,7 @@ def _real_extract(self, url):
             format_info = info['videoPlayerObject']['video']
             formats.append({
                 'format_id': f_id,
-                'url': format_info['url'],
+                'url': format_info['videoInfoList'][0]['videoUrl'],
             })
 
         return {

From 6a745c2c0fa2d627b46f2d4d8013fa69276c4fac Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 18:22:19 +0800
Subject: [PATCH 11/29] [pinkbike] PEP8

---
 youtube_dl/extractor/pinkbike.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py
index 745433b48..a52210fab 100644
--- a/youtube_dl/extractor/pinkbike.py
+++ b/youtube_dl/extractor/pinkbike.py
@@ -44,7 +44,7 @@ def _real_extract(self, url):
 
         formats = []
         for _, format_id, src in re.findall(
-            r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
+                r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage):
             height = int_or_none(self._search_regex(
                 r'^(\d+)[pP]$', format_id, 'height', default=None))
             formats.append({

From b407e173e44041b1a92fb61e316f92d19834a40a Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 18:23:58 +0800
Subject: [PATCH 12/29] [vimeo/generic] Move detection logic from GenericIE to
 VimeoIE

---
 youtube_dl/extractor/generic.py | 16 ++++------------
 youtube_dl/extractor/vimeo.py   | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index f6b984300..bf689f531 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -43,6 +43,7 @@
 from .bliptv import BlipTVIE
 from .svt import SVTIE
 from .pornhub import PornHubIE
+from .vimeo import VimeoIE
 
 
 class GenericIE(InfoExtractor):
@@ -1089,18 +1090,9 @@ def _playlist_from_matches(matches, getter=None, ie=None):
         if matches:
             return _playlist_from_matches(matches, ie='RtlNl')
 
-        # Look for embedded (iframe) Vimeo player
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
-        if mobj:
-            player_url = unescapeHTML(mobj.group('url'))
-            surl = smuggle_url(player_url, {'Referer': url})
-            return self.url_result(surl)
-        # Look for embedded (swf embed) Vimeo player
-        mobj = re.search(
-            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
-        if mobj:
-            return self.url_result(mobj.group(1))
+        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
+        if vimeo_url is not None:
+            return self.url_result(vimeo_url)
 
         # Look for embedded YouTube player
         matches = re.findall(r'''(?x)
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index f300c7ca4..cae90205d 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -22,6 +22,7 @@
     unified_strdate,
     unsmuggle_url,
     urlencode_postdata,
+    unescapeHTML,
 )
 
 
@@ -173,6 +174,21 @@ class VimeoIE(VimeoBaseInfoExtractor):
         },
     ]
 
+    @staticmethod
+    def _extract_vimeo_url(url, webpage):
+        # Look for embedded (iframe) Vimeo player
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
+        if mobj:
+            player_url = unescapeHTML(mobj.group('url'))
+            surl = smuggle_url(player_url, {'Referer': url})
+            return surl
+        # Look for embedded (swf embed) Vimeo player
+        mobj = re.search(
+            r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
+        if mobj:
+            return mobj.group(1)
+
     def _verify_video_password(self, url, video_id, webpage):
         password = self._downloader.params.get('videopassword', None)
         if password is None:

From c5895d5dbdc33fbad1c91f448704d7711448220d Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 18:30:38 +0800
Subject: [PATCH 13/29] [tumblr] Support Vimeo embeds (fixes #5969)

---
 youtube_dl/extractor/tumblr.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py
index 63c20310d..9ead13a91 100644
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -5,6 +5,7 @@
 
 from .common import InfoExtractor
 from .pornhub import PornHubIE
+from .vimeo import VimeoIE
 
 
 class TumblrIE(InfoExtractor):
@@ -40,6 +41,17 @@ class TumblrIE(InfoExtractor):
             'timestamp': 1430931613,
         },
         'add_ie': ['Vidme'],
+    }, {
+        'url': 'http://camdamage.tumblr.com/post/98846056295/',
+        'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
+        'info_dict': {
+            'id': '105463834',
+            'ext': 'mp4',
+            'title': 'Cam Damage-HD 720p',
+            'uploader': 'John Moyer',
+            'uploader_id': 'user32021558',
+        },
+        'add_ie': ['Vimeo'],
     }]
 
     def _real_extract(self, url):
@@ -60,6 +72,10 @@ def _real_extract(self, url):
         if pornhub_url:
             return self.url_result(pornhub_url, 'PornHub')
 
+        vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
+        if vimeo_url:
+            return self.url_result(vimeo_url, 'Vimeo')
+
         iframe_url = self._search_regex(
             r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
             webpage, 'iframe url')

From 396726244a9096f142f5420ba5f3a1a36abb9a86 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 18:53:17 +0800
Subject: [PATCH 14/29] [utils/ffmpeg] Move ISO 639 related codes to utils

---
 youtube_dl/postprocessor/ffmpeg.py | 196 +---------------------------
 youtube_dl/utils.py                | 202 +++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+), 194 deletions(-)

diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index cc65b34e7..fe7e0a8ee 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -21,6 +21,7 @@
     shell_quote,
     subtitles_filename,
     dfxp2srt,
+    ISO639Utils,
 )
 
 
@@ -307,199 +308,6 @@ def run(self, information):
 
 
 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
-    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
-    _lang_map = {
-        'aa': 'aar',
-        'ab': 'abk',
-        'ae': 'ave',
-        'af': 'afr',
-        'ak': 'aka',
-        'am': 'amh',
-        'an': 'arg',
-        'ar': 'ara',
-        'as': 'asm',
-        'av': 'ava',
-        'ay': 'aym',
-        'az': 'aze',
-        'ba': 'bak',
-        'be': 'bel',
-        'bg': 'bul',
-        'bh': 'bih',
-        'bi': 'bis',
-        'bm': 'bam',
-        'bn': 'ben',
-        'bo': 'bod',
-        'br': 'bre',
-        'bs': 'bos',
-        'ca': 'cat',
-        'ce': 'che',
-        'ch': 'cha',
-        'co': 'cos',
-        'cr': 'cre',
-        'cs': 'ces',
-        'cu': 'chu',
-        'cv': 'chv',
-        'cy': 'cym',
-        'da': 'dan',
-        'de': 'deu',
-        'dv': 'div',
-        'dz': 'dzo',
-        'ee': 'ewe',
-        'el': 'ell',
-        'en': 'eng',
-        'eo': 'epo',
-        'es': 'spa',
-        'et': 'est',
-        'eu': 'eus',
-        'fa': 'fas',
-        'ff': 'ful',
-        'fi': 'fin',
-        'fj': 'fij',
-        'fo': 'fao',
-        'fr': 'fra',
-        'fy': 'fry',
-        'ga': 'gle',
-        'gd': 'gla',
-        'gl': 'glg',
-        'gn': 'grn',
-        'gu': 'guj',
-        'gv': 'glv',
-        'ha': 'hau',
-        'he': 'heb',
-        'hi': 'hin',
-        'ho': 'hmo',
-        'hr': 'hrv',
-        'ht': 'hat',
-        'hu': 'hun',
-        'hy': 'hye',
-        'hz': 'her',
-        'ia': 'ina',
-        'id': 'ind',
-        'ie': 'ile',
-        'ig': 'ibo',
-        'ii': 'iii',
-        'ik': 'ipk',
-        'io': 'ido',
-        'is': 'isl',
-        'it': 'ita',
-        'iu': 'iku',
-        'ja': 'jpn',
-        'jv': 'jav',
-        'ka': 'kat',
-        'kg': 'kon',
-        'ki': 'kik',
-        'kj': 'kua',
-        'kk': 'kaz',
-        'kl': 'kal',
-        'km': 'khm',
-        'kn': 'kan',
-        'ko': 'kor',
-        'kr': 'kau',
-        'ks': 'kas',
-        'ku': 'kur',
-        'kv': 'kom',
-        'kw': 'cor',
-        'ky': 'kir',
-        'la': 'lat',
-        'lb': 'ltz',
-        'lg': 'lug',
-        'li': 'lim',
-        'ln': 'lin',
-        'lo': 'lao',
-        'lt': 'lit',
-        'lu': 'lub',
-        'lv': 'lav',
-        'mg': 'mlg',
-        'mh': 'mah',
-        'mi': 'mri',
-        'mk': 'mkd',
-        'ml': 'mal',
-        'mn': 'mon',
-        'mr': 'mar',
-        'ms': 'msa',
-        'mt': 'mlt',
-        'my': 'mya',
-        'na': 'nau',
-        'nb': 'nob',
-        'nd': 'nde',
-        'ne': 'nep',
-        'ng': 'ndo',
-        'nl': 'nld',
-        'nn': 'nno',
-        'no': 'nor',
-        'nr': 'nbl',
-        'nv': 'nav',
-        'ny': 'nya',
-        'oc': 'oci',
-        'oj': 'oji',
-        'om': 'orm',
-        'or': 'ori',
-        'os': 'oss',
-        'pa': 'pan',
-        'pi': 'pli',
-        'pl': 'pol',
-        'ps': 'pus',
-        'pt': 'por',
-        'qu': 'que',
-        'rm': 'roh',
-        'rn': 'run',
-        'ro': 'ron',
-        'ru': 'rus',
-        'rw': 'kin',
-        'sa': 'san',
-        'sc': 'srd',
-        'sd': 'snd',
-        'se': 'sme',
-        'sg': 'sag',
-        'si': 'sin',
-        'sk': 'slk',
-        'sl': 'slv',
-        'sm': 'smo',
-        'sn': 'sna',
-        'so': 'som',
-        'sq': 'sqi',
-        'sr': 'srp',
-        'ss': 'ssw',
-        'st': 'sot',
-        'su': 'sun',
-        'sv': 'swe',
-        'sw': 'swa',
-        'ta': 'tam',
-        'te': 'tel',
-        'tg': 'tgk',
-        'th': 'tha',
-        'ti': 'tir',
-        'tk': 'tuk',
-        'tl': 'tgl',
-        'tn': 'tsn',
-        'to': 'ton',
-        'tr': 'tur',
-        'ts': 'tso',
-        'tt': 'tat',
-        'tw': 'twi',
-        'ty': 'tah',
-        'ug': 'uig',
-        'uk': 'ukr',
-        'ur': 'urd',
-        'uz': 'uzb',
-        've': 'ven',
-        'vi': 'vie',
-        'vo': 'vol',
-        'wa': 'wln',
-        'wo': 'wol',
-        'xh': 'xho',
-        'yi': 'yid',
-        'yo': 'yor',
-        'za': 'zha',
-        'zh': 'zho',
-        'zu': 'zul',
-    }
-
-    @classmethod
-    def _conver_lang_code(cls, code):
-        """Convert language code from ISO 639-1 to ISO 639-2/T"""
-        return cls._lang_map.get(code[:2])
-
     def run(self, information):
         if information['ext'] not in ['mp4', 'mkv']:
             self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files')
@@ -525,7 +333,7 @@ def run(self, information):
             opts += ['-c:s', 'mov_text']
         for (i, lang) in enumerate(sub_langs):
             opts.extend(['-map', '%d:0' % (i + 1)])
-            lang_code = self._conver_lang_code(lang)
+            lang_code = ISO639Utils.short2long(lang)
             if lang_code is not None:
                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
 
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 52d198fa3..259a9d634 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1879,6 +1879,208 @@ def parse_node(node):
     return ''.join(out)
 
 
+class ISO639Utils(object):
+    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
+    _lang_map = {
+        'aa': 'aar',
+        'ab': 'abk',
+        'ae': 'ave',
+        'af': 'afr',
+        'ak': 'aka',
+        'am': 'amh',
+        'an': 'arg',
+        'ar': 'ara',
+        'as': 'asm',
+        'av': 'ava',
+        'ay': 'aym',
+        'az': 'aze',
+        'ba': 'bak',
+        'be': 'bel',
+        'bg': 'bul',
+        'bh': 'bih',
+        'bi': 'bis',
+        'bm': 'bam',
+        'bn': 'ben',
+        'bo': 'bod',
+        'br': 'bre',
+        'bs': 'bos',
+        'ca': 'cat',
+        'ce': 'che',
+        'ch': 'cha',
+        'co': 'cos',
+        'cr': 'cre',
+        'cs': 'ces',
+        'cu': 'chu',
+        'cv': 'chv',
+        'cy': 'cym',
+        'da': 'dan',
+        'de': 'deu',
+        'dv': 'div',
+        'dz': 'dzo',
+        'ee': 'ewe',
+        'el': 'ell',
+        'en': 'eng',
+        'eo': 'epo',
+        'es': 'spa',
+        'et': 'est',
+        'eu': 'eus',
+        'fa': 'fas',
+        'ff': 'ful',
+        'fi': 'fin',
+        'fj': 'fij',
+        'fo': 'fao',
+        'fr': 'fra',
+        'fy': 'fry',
+        'ga': 'gle',
+        'gd': 'gla',
+        'gl': 'glg',
+        'gn': 'grn',
+        'gu': 'guj',
+        'gv': 'glv',
+        'ha': 'hau',
+        'he': 'heb',
+        'hi': 'hin',
+        'ho': 'hmo',
+        'hr': 'hrv',
+        'ht': 'hat',
+        'hu': 'hun',
+        'hy': 'hye',
+        'hz': 'her',
+        'ia': 'ina',
+        'id': 'ind',
+        'ie': 'ile',
+        'ig': 'ibo',
+        'ii': 'iii',
+        'ik': 'ipk',
+        'io': 'ido',
+        'is': 'isl',
+        'it': 'ita',
+        'iu': 'iku',
+        'ja': 'jpn',
+        'jv': 'jav',
+        'ka': 'kat',
+        'kg': 'kon',
+        'ki': 'kik',
+        'kj': 'kua',
+        'kk': 'kaz',
+        'kl': 'kal',
+        'km': 'khm',
+        'kn': 'kan',
+        'ko': 'kor',
+        'kr': 'kau',
+        'ks': 'kas',
+        'ku': 'kur',
+        'kv': 'kom',
+        'kw': 'cor',
+        'ky': 'kir',
+        'la': 'lat',
+        'lb': 'ltz',
+        'lg': 'lug',
+        'li': 'lim',
+        'ln': 'lin',
+        'lo': 'lao',
+        'lt': 'lit',
+        'lu': 'lub',
+        'lv': 'lav',
+        'mg': 'mlg',
+        'mh': 'mah',
+        'mi': 'mri',
+        'mk': 'mkd',
+        'ml': 'mal',
+        'mn': 'mon',
+        'mr': 'mar',
+        'ms': 'msa',
+        'mt': 'mlt',
+        'my': 'mya',
+        'na': 'nau',
+        'nb': 'nob',
+        'nd': 'nde',
+        'ne': 'nep',
+        'ng': 'ndo',
+        'nl': 'nld',
+        'nn': 'nno',
+        'no': 'nor',
+        'nr': 'nbl',
+        'nv': 'nav',
+        'ny': 'nya',
+        'oc': 'oci',
+        'oj': 'oji',
+        'om': 'orm',
+        'or': 'ori',
+        'os': 'oss',
+        'pa': 'pan',
+        'pi': 'pli',
+        'pl': 'pol',
+        'ps': 'pus',
+        'pt': 'por',
+        'qu': 'que',
+        'rm': 'roh',
+        'rn': 'run',
+        'ro': 'ron',
+        'ru': 'rus',
+        'rw': 'kin',
+        'sa': 'san',
+        'sc': 'srd',
+        'sd': 'snd',
+        'se': 'sme',
+        'sg': 'sag',
+        'si': 'sin',
+        'sk': 'slk',
+        'sl': 'slv',
+        'sm': 'smo',
+        'sn': 'sna',
+        'so': 'som',
+        'sq': 'sqi',
+        'sr': 'srp',
+        'ss': 'ssw',
+        'st': 'sot',
+        'su': 'sun',
+        'sv': 'swe',
+        'sw': 'swa',
+        'ta': 'tam',
+        'te': 'tel',
+        'tg': 'tgk',
+        'th': 'tha',
+        'ti': 'tir',
+        'tk': 'tuk',
+        'tl': 'tgl',
+        'tn': 'tsn',
+        'to': 'ton',
+        'tr': 'tur',
+        'ts': 'tso',
+        'tt': 'tat',
+        'tw': 'twi',
+        'ty': 'tah',
+        'ug': 'uig',
+        'uk': 'ukr',
+        'ur': 'urd',
+        'uz': 'uzb',
+        've': 'ven',
+        'vi': 'vie',
+        'vo': 'vol',
+        'wa': 'wln',
+        'wo': 'wol',
+        'xh': 'xho',
+        'yi': 'yid',
+        'yo': 'yor',
+        'za': 'zha',
+        'zh': 'zho',
+        'zu': 'zul',
+    }
+
+    @classmethod
+    def short2long(cls, code):
+        """Convert language code from ISO 639-1 to ISO 639-2/T"""
+        return cls._lang_map.get(code[:2])
+
+    @classmethod
+    def long2short(cls, code):
+        """Convert language code from ISO 639-2/T to ISO 639-1"""
+        for short_name, long_name in cls._lang_map.items():
+            if long_name == code:
+                return short_name
+
+
 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
     def __init__(self, proxies=None):
         # Set default handlers

From 607841af64d308eaf577e528fd7317a8b382b8e6 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 18:55:26 +0800
Subject: [PATCH 15/29] [adobetv] Support embeddable videos (closes #6039)

---
 youtube_dl/extractor/__init__.py |  5 ++-
 youtube_dl/extractor/adobetv.py  | 59 ++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 6c548d8e9..0f4af88f0 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -4,7 +4,10 @@
 from .abc7news import Abc7NewsIE
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
-from .adobetv import AdobeTVIE
+from .adobetv import (
+    AdobeTVIE,
+    AdobeTVVideoIE,
+)
 from .adultswim import AdultSwimIE
 from .aftenposten import AftenpostenIE
 from .aftonbladet import AftonbladetIE
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
index 97d128560..695a4a15c 100644
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -5,6 +5,8 @@
     parse_duration,
     unified_strdate,
     str_to_int,
+    float_or_none,
+    ISO639Utils,
 )
 
 
@@ -69,3 +71,60 @@ def _real_extract(self, url):
             'view_count': view_count,
             'formats': formats,
         }
+
+
+class AdobeTVVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'https://video.tv.adobe.com/v/2456/',
+        'md5': '43662b577c018ad707a63766462b1e87',
+        'info_dict': {
+            'id': '2456',
+            'ext': 'mp4',
+            'title': 'New experience with Acrobat DC',
+            'description': 'New experience with Acrobat DC',
+            'duration': 248.667,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        player_params = self._parse_json(self._search_regex(
+            r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
+            video_id)
+
+        formats = [{
+            'url': source['src'],
+            'width': source.get('width'),
+            'height': source.get('height'),
+            'tbr': source.get('bitrate'),
+        } for source in player_params['sources']]
+
+        # For both metadata and downloaded files the duration varies among
+        # formats. I just pick the max one
+        duration = max(filter(None, [
+            float_or_none(source.get('duration'), scale=1000)
+            for source in player_params['sources']]))
+
+        subtitles = {}
+        for translation in player_params.get('translations', []):
+            lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
+            if lang_id not in subtitles:
+                subtitles[lang_id] = []
+            subtitles[lang_id].append({
+                'url': translation['vttPath'],
+                'ext': 'vtt',
+            })
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': player_params['title'],
+            'description': self._og_search_description(webpage),
+            'duration': duration,
+            'subtitles': subtitles,
+        }

From 4e3357717312ac56145ba166a1ae2806f6db8337 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 19:16:59 +0800
Subject: [PATCH 16/29] [utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038
---
 youtube_dl/utils.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 259a9d634..a2746b2d1 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds):
 
 
 def dfxp2srt(dfxp_data):
-    _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
+    _x = functools.partial(xpath_with_ns, ns_map={
+        'ttml': 'http://www.w3.org/ns/ttml',
+        'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
+    })
 
     def parse_node(node):
         str_or_empty = functools.partial(str_or_none, default='')
@@ -1849,9 +1852,9 @@ def parse_node(node):
         out = str_or_empty(node.text)
 
         for child in node:
-            if child.tag in (_x('ttml:br'), 'br'):
+            if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
                 out += '\n' + str_or_empty(child.tail)
-            elif child.tag in (_x('ttml:span'), 'span'):
+            elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
                 out += str_or_empty(parse_node(child))
             else:
                 out += str_or_empty(xml.etree.ElementTree.tostring(child))
@@ -1860,7 +1863,7 @@ def parse_node(node):
 
     dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
     out = []
-    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
 
     if not paras:
         raise ValueError('Invalid dfxp/TTML subtitle')

From 78294e6a9ce2c9a294d663ac79936df7353b9980 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 19:22:26 +0800
Subject: [PATCH 17/29] [bbccouk] Remove TTML to srt conversion codes

It's broken. See #6038
---
 youtube_dl/extractor/bbccouk.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py
index 0305f88b5..5825d2867 100644
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -251,26 +251,11 @@ def _get_subtitles(self, media, programme_id):
         for connection in self._extract_connections(media):
             captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
             lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
-            ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
-            srt = ''
-
-            def _extract_text(p):
-                if p.text is not None:
-                    stripped_text = p.text.strip()
-                    if stripped_text:
-                        return stripped_text
-                return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
-            for pos, p in enumerate(ps):
-                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
             subtitles[lang] = [
                 {
                     'url': connection.get('href'),
                     'ext': 'ttml',
                 },
-                {
-                    'data': srt,
-                    'ext': 'srt',
-                },
             ]
         return subtitles
 

From 756f574e4e7160ca5b39c6e18ec5168beb4a8eb1 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 21:30:34 +0800
Subject: [PATCH 18/29] [dailymotion/generic] Add DailymotionCloudIE

---
 youtube_dl/extractor/__init__.py    |  1 +
 youtube_dl/extractor/dailymotion.py | 42 +++++++++++++++++++++++++++++
 youtube_dl/extractor/generic.py     | 17 ++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 0f4af88f0..bd3c3193f 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -106,6 +106,7 @@
     DailymotionIE,
     DailymotionPlaylistIE,
     DailymotionUserIE,
+    DailymotionCloudIE,
 )
 from .daum import DaumIE
 from .dbtv import DBTVIE
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 70aa4333c..96f0ed9ad 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -251,3 +251,45 @@ def _real_extract(self, url):
             'title': full_user,
             'entries': self._extract_entries(user),
         }
+
+
+class DailymotionCloudIE(DailymotionBaseInfoExtractor):
+    _VALID_URL = r'http://api\.dmcloud\.net/embed/[^/]+/(?P<id>[^/?]+)'
+
+    _TEST = {
+        # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html
+        # Tested at FranceTvInfo_2
+        'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1',
+        'only_matching': True,
+    }
+
+    @classmethod
+    def _extract_dmcloud_url(self, webpage):
+        mobj = re.search(r'<iframe[^>]+src=[\'"](http://api\.dmcloud\.net/embed/[^/]+/[^\'"]+)[\'"]', webpage)
+        if mobj:
+            return mobj.group(1)
+
+        mobj = re.search(r'<input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](http://api\.dmcloud\.net/embed/[^/]+/[^\'"]+)[\'"]', webpage)
+        if mobj:
+            return mobj.group(1)
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        request = self._build_request(url)
+        webpage = self._download_webpage(request, video_id)
+
+        title = self._html_search_regex(r'<title>([^>]+)</title>', webpage, 'title')
+
+        video_info = self._parse_json(self._search_regex(
+            r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id)
+
+        # TODO: parse ios_url, which is in fact a manifest
+        video_url = video_info['mp4_url']
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'thumbnail': video_info.get('thumbnail_url'),
+        }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index bf689f531..07939b196 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -44,6 +44,7 @@
 from .svt import SVTIE
 from .pornhub import PornHubIE
 from .vimeo import VimeoIE
+from .dailymotion import DailymotionCloudIE
 
 
 class GenericIE(InfoExtractor):
@@ -813,6 +814,17 @@ class GenericIE(InfoExtractor):
                 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
                 'uploader': 'Rogers Sportsnet',
             },
+        },
+        # Dailymotion Cloud video
+        {
+            'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
+            'md5': '49444254273501a64675a7e68c502681',
+            'info_dict': {
+                'id': '5585de919473990de4bee11b',
+                'ext': 'mp4',
+                'title': 'Le débat',
+                'thumbnail': 're:^https?://.*\.jpe?g$',
+            }
         }
     ]
 
@@ -1486,6 +1498,11 @@ def _playlist_from_matches(matches, getter=None, ie=None):
         if senate_isvp_url:
             return self.url_result(senate_isvp_url, 'SenateISVP')
 
+        # Look for Dailymotion Cloud videos
+        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
+        if dmcloud_url:
+            return self.url_result(dmcloud_url, 'DailymotionCloud')
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True

From 6f96e308d0fa7674ac88e1e80fc602413f9a6b31 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 21:31:33 +0800
Subject: [PATCH 19/29] [francetvinfo.fr] Support dmcloud embeds (fixes #6034)

---
 youtube_dl/extractor/francetv.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index db0bbec1e..b2c984bf2 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -18,6 +18,7 @@
     parse_duration,
     determine_ext,
 )
+from .dailymotion import DailymotionCloudIE
 
 
 class FranceTVBaseInfoExtractor(InfoExtractor):
@@ -131,12 +132,26 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
             'skip_download': 'HLS (reqires ffmpeg)'
         },
         'skip': 'Ce direct est terminé et sera disponible en rattrapage dans quelques minutes.',
+    }, {
+        'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
+        'md5': 'f485bda6e185e7d15dbc69b72bae993e',
+        'info_dict': {
+            'id': '556e03339473995ee145930c',
+            'ext': 'mp4',
+            'title': 'Les entreprises familiales : le secret de la réussite',
+            'thumbnail': 're:^https?://.*\.jpe?g$',
+        }
     }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         page_title = mobj.group('title')
         webpage = self._download_webpage(url, page_title)
+
+        dmcloud_url = DailymotionCloudIE._extract_dmcloud_url(webpage)
+        if dmcloud_url:
+            return self.url_result(dmcloud_url, 'DailymotionCloud')
+
         video_id, catalogue = self._search_regex(
             r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@')
         return self._extract_video(video_id, catalogue)

From 3f3308cd75fc068e4d67d00aa7d7892e02ab16e9 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 21 Jun 2015 23:29:40 +0800
Subject: [PATCH 20/29] Revert "[sohu] Update extractor"

This reverts commit 32060c6d6b618fa858b2ce43db34d02fd43bc542.
---
 youtube_dl/extractor/sohu.py | 46 ++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 29bd9ce6f..7644cc02d 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -8,7 +8,10 @@
     compat_str,
     compat_urllib_request
 )
-from ..utils import ExtractorError
+from ..utils import (
+    sanitize_url_path_consecutive_slashes,
+    ExtractorError,
+)
 
 
 class SohuIE(InfoExtractor):
@@ -26,7 +29,7 @@ class SohuIE(InfoExtractor):
         'skip': 'On available in China',
     }, {
         'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
-        'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
+        'md5': '699060e75cf58858dd47fb9c03c42cfb',
         'info_dict': {
             'id': '409385080',
             'ext': 'mp4',
@@ -34,7 +37,7 @@ class SohuIE(InfoExtractor):
         }
     }, {
         'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
-        'md5': '49308ff6dafde5ece51137d04aec311e',
+        'md5': '9bf34be48f2f4dadcb226c74127e203c',
         'info_dict': {
             'id': '78693464',
             'ext': 'mp4',
@@ -48,7 +51,7 @@ class SohuIE(InfoExtractor):
             'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
         },
         'playlist': [{
-            'md5': '492923eac023ba2f13ff69617c32754a',
+            'md5': 'bdbfb8f39924725e6589c146bc1883ad',
             'info_dict': {
                 'id': '78910339_part1',
                 'ext': 'mp4',
@@ -56,7 +59,7 @@ class SohuIE(InfoExtractor):
                 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
             }
         }, {
-            'md5': 'de604848c0e8e9c4a4dde7e1347c0637',
+            'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
             'info_dict': {
                 'id': '78910339_part2',
                 'ext': 'mp4',
@@ -64,7 +67,7 @@ class SohuIE(InfoExtractor):
                 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
             }
         }, {
-            'md5': '93584716ee0657c0b205b8aa3d27aa13',
+            'md5': '8407e634175fdac706766481b9443450',
             'info_dict': {
                 'id': '78910339_part3',
                 'ext': 'mp4',
@@ -139,21 +142,24 @@ def _fetch_data(vid_id, mytv=False):
         for i in range(part_count):
             formats = []
             for format_id, format_data in formats_json.items():
-                data = format_data['data']
+                allot = format_data['allot']
+                prot = format_data['prot']
 
-                # URLs starts with http://newflv.sohu.ccgslb.net/ is not usable
-                # so retry until got a working URL
-                video_url = 'newflv.sohu.ccgslb.net'
-                retries = 0
-                while 'newflv.sohu.ccgslb.net' in video_url and retries < 5:
-                    download_note = 'Download information from CDN gateway for format ' + format_id
-                    if retries > 0:
-                        download_note += ' (retry #%d)' % retries
-                    retries += 1
-                    cdn_info = self._download_json(
-                        'http://data.vod.itc.cn/cdnList?new=' + data['su'][i],
-                        video_id, download_note)
-                    video_url = cdn_info['url']
+                data = format_data['data']
+                clips_url = data['clipsURL']
+                su = data['su']
+
+                part_str = self._download_webpage(
+                    'http://%s/?prot=%s&file=%s&new=%s' %
+                    (allot, prot, clips_url[i], su[i]),
+                    video_id,
+                    'Downloading %s video URL part %d of %d'
+                    % (format_id, i + 1, part_count))
+
+                part_info = part_str.split('|')
+
+                video_url = sanitize_url_path_consecutive_slashes(
+                    '%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
 
                 formats.append({
                     'url': video_url,

From 98ca102441624c2a1a66114c32e0142899f78dd3 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Mon, 22 Jun 2015 00:59:55 +0800
Subject: [PATCH 21/29] [sohu] Fix extraction again

---
 youtube_dl/extractor/sohu.py | 42 +++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 7644cc02d..ba2d5e19b 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -6,10 +6,10 @@
 from .common import InfoExtractor
 from ..compat import (
     compat_str,
-    compat_urllib_request
+    compat_urllib_request,
+    compat_urllib_parse,
 )
 from ..utils import (
-    sanitize_url_path_consecutive_slashes,
     ExtractorError,
 )
 
@@ -143,23 +143,41 @@ def _fetch_data(vid_id, mytv=False):
             formats = []
             for format_id, format_data in formats_json.items():
                 allot = format_data['allot']
-                prot = format_data['prot']
 
                 data = format_data['data']
                 clips_url = data['clipsURL']
                 su = data['su']
 
-                part_str = self._download_webpage(
-                    'http://%s/?prot=%s&file=%s&new=%s' %
-                    (allot, prot, clips_url[i], su[i]),
-                    video_id,
-                    'Downloading %s video URL part %d of %d'
-                    % (format_id, i + 1, part_count))
+                video_url = 'newflv.sohu.ccgslb.net'
+                cdnId = None
+                retries = 0
 
-                part_info = part_str.split('|')
+                while 'newflv.sohu.ccgslb.net' in video_url:
+                    params = {
+                        'prot': 9,
+                        'file': clips_url[i],
+                        'new': su[i],
+                        'prod': 'flash',
+                    }
 
-                video_url = sanitize_url_path_consecutive_slashes(
-                    '%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
+                    if cdnId is not None:
+                        params['idc'] = cdnId
+
+                    download_note = 'Downloading %s video URL part %d of %d' % (
+                        format_id, i + 1, part_count)
+
+                    if retries > 0:
+                        download_note += ' (retry #%d)' % retries
+                    part_info = self._parse_json(self._download_webpage(
+                        'http://%s/?%s' % (allot, compat_urllib_parse.urlencode(params)),
+                        video_id, download_note), video_id)
+
+                    video_url = part_info['url']
+                    cdnId = part_info.get('nid')
+
+                    retries += 1
+                    if retries > 5:
+                        raise ExtractorError('Failed to get video URL')
 
                 formats.append({
                     'url': video_url,

From 0bbba43ed0c68b612fcafbdad460a93b733b9f87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 21 Jun 2015 23:10:38 +0600
Subject: [PATCH 22/29] [xhamster:embed] Add extractor (Closes #6032)

---
 youtube_dl/extractor/__init__.py |  5 ++++-
 youtube_dl/extractor/xhamster.py | 28 +++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index bd3c3193f..dc1a302e6 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -701,7 +701,10 @@
 from .wsj import WSJIE
 from .xbef import XBefIE
 from .xboxclips import XboxClipsIE
-from .xhamster import XHamsterIE
+from .xhamster import (
+    XHamsterIE,
+    XHamsterEmbedIE,
+)
 from .xminus import XMinusIE
 from .xnxx import XNXXIE
 from .xstream import XstreamIE
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 4527567f8..ae3c5962b 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -13,7 +13,6 @@
 
 
 class XHamsterIE(InfoExtractor):
-    """Information Extractor for xHamster"""
     _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
     _TESTS = [
         {
@@ -133,3 +132,30 @@ def is_hd(webpage):
             'age_limit': age_limit,
             'formats': formats,
         }
+
+
+class XHamsterEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://xhamster.com/xembed.php?video=3328539',
+        'info_dict': {
+            'id': '3328539',
+            'ext': 'mp4',
+            'title': 'Pen Masturbation',
+            'upload_date': '20140728',
+            'uploader_id': 'anonymous',
+            'duration': 5,
+            'age_limit': 18,
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(
+            r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
+            webpage, 'xhamster url')
+
+        return self.url_result(video_url, 'XHamster');

From 2bb5b6d0a1671957c7a2e6d6433901b2a1b8f48f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 21 Jun 2015 23:11:25 +0600
Subject: [PATCH 23/29] [generic] Add support for xhamster embeds

---
 youtube_dl/extractor/generic.py  | 6 ++++++
 youtube_dl/extractor/xhamster.py | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 07939b196..e108bde66 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -43,6 +43,7 @@
 from .bliptv import BlipTVIE
 from .svt import SVTIE
 from .pornhub import PornHubIE
+from .xhamster import XHamsterEmbedIE
 from .vimeo import VimeoIE
 from .dailymotion import DailymotionCloudIE
 
@@ -1331,6 +1332,11 @@ def _playlist_from_matches(matches, getter=None, ie=None):
         if pornhub_url:
             return self.url_result(pornhub_url, 'PornHub')
 
+        # Look for embedded XHamster player
+        xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
+        if xhamster_urls:
+            return _playlist_from_matches(xhamster_urls, ie='XHamsterEmbed')
+
         # Look for embedded Tvigle player
         mobj = re.search(
             r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index ae3c5962b..725e01ac3 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -149,6 +149,12 @@ class XHamsterEmbedIE(InfoExtractor):
         }
     }
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [url for _, url in re.findall(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
+            webpage)]
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 

From c76799c5553b3b48bb7cc73dec452c3637a8670a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 21 Jun 2015 23:18:28 +0600
Subject: [PATCH 24/29] [extractor/generic] Add test for xhamster embed

---
 youtube_dl/extractor/generic.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index e108bde66..20fcd8170 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -336,6 +336,15 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        # XHamster embed
+        {
+            'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
+            'info_dict': {
+                'id': 'showthread',
+                'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
+            },
+            'playlist_mincount': 7,
+        },
         # Embedded TED video
         {
             'url': 'http://en.support.wordpress.com/videos/ted-talks/',

From a5158f38a31e863a39de8f66c26469a5d4469280 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Mon, 22 Jun 2015 15:02:53 +0800
Subject: [PATCH 25/29] [generic/adobetv] Support AdobeTVVideo embeds (#6039)

---
 youtube_dl/extractor/adobetv.py |  1 +
 youtube_dl/extractor/generic.py | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
index 695a4a15c..5e43adc51 100644
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -77,6 +77,7 @@ class AdobeTVVideoIE(InfoExtractor):
     _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
 
     _TEST = {
+        # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
         'url': 'https://video.tv.adobe.com/v/2456/',
         'md5': '43662b577c018ad707a63766462b1e87',
         'info_dict': {
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 20fcd8170..5c03fddc6 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -835,6 +835,18 @@ class GenericIE(InfoExtractor):
                 'title': 'Le débat',
                 'thumbnail': 're:^https?://.*\.jpe?g$',
             }
+        },
+        # AdobeTVVideo embed
+        {
+            'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
+            'md5': '43662b577c018ad707a63766462b1e87',
+            'info_dict': {
+                'id': '2456',
+                'ext': 'mp4',
+                'title': 'New experience with Acrobat DC',
+                'description': 'New experience with Acrobat DC',
+                'duration': 248.667,
+            },
         }
     ]
 
@@ -1518,6 +1530,15 @@ def _playlist_from_matches(matches, getter=None, ie=None):
         if dmcloud_url:
             return self.url_result(dmcloud_url, 'DailymotionCloud')
 
+        # Look for AdobeTVVideo embeds
+        mobj = re.search(
+            r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
+            webpage)
+        if mobj is not None:
+            return self.url_result(
+                self._proto_relative_url(unescapeHTML(mobj.group(1))),
+                'AdobeTVVideo')
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True

From 25701d5a2ca8c7a58c91e11c6a30d4e61b02e89c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Mon, 22 Jun 2015 11:18:52 +0200
Subject: [PATCH 26/29] [xhamster] pep8: remove trailing ';'

---
 youtube_dl/extractor/xhamster.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 725e01ac3..b4ad513a0 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -164,4 +164,4 @@ def _real_extract(self, url):
             r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
             webpage, 'xhamster url')
 
-        return self.url_result(video_url, 'XHamster');
+        return self.url_result(video_url, 'XHamster')

From 255f5694aabe07fa7f33978c6b97ced469e172db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 22 Jun 2015 20:11:15 +0600
Subject: [PATCH 27/29] [faz] Extend _VALID_URL (Closes #6050)

---
 youtube_dl/extractor/faz.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index 3c39ca451..cebdd0193 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -6,9 +6,9 @@
 
 class FazIE(InfoExtractor):
     IE_NAME = 'faz.net'
-    _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html',
         'info_dict': {
             'id': '12610585',
@@ -16,7 +16,22 @@ class FazIE(InfoExtractor):
             'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher',
             'description': 'md5:1453fbf9a0d041d985a47306192ea253',
         },
-    }
+    }, {
+        'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.faz.net/-13659345.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.faz.net/aktuell/politik/-13659345.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.faz.net/foobarblafasel-13659345.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)

From e20d0c1e69f66a82dd493680351538ea92393fd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Tue, 23 Jun 2015 21:34:29 +0600
Subject: [PATCH 28/29] [brightcove] Use `compat_xml_parse_error` (Closes
 #6060)

---
 youtube_dl/extractor/brightcove.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index d768f99e6..4721c2293 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -13,6 +13,7 @@
     compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urlparse,
+    compat_xml_parse_error,
 )
 from ..utils import (
     determine_ext,
@@ -119,7 +120,7 @@ def _build_brighcove_url(cls, object_str):
 
         try:
             object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
-        except xml.etree.ElementTree.ParseError:
+        except compat_xml_parse_error:
             return
 
         fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')

From 18b5e1e5348ba3a6d1b6a98e97217eebb3d32a1e Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Wed, 24 Jun 2015 16:00:12 +0800
Subject: [PATCH 29/29] [drbonanza] Fix extraction of videos

---
 youtube_dl/extractor/drbonanza.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/drbonanza.py b/youtube_dl/extractor/drbonanza.py
index 7626219ba..8b98b013a 100644
--- a/youtube_dl/extractor/drbonanza.py
+++ b/youtube_dl/extractor/drbonanza.py
@@ -15,7 +15,6 @@ class DRBonanzaIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.dr.dk/bonanza/serie/portraetter/Talkshowet.htm?assetId=65517',
-        'md5': 'fe330252ddea607635cf2eb2c99a0af3',
         'info_dict': {
             'id': '65517',
             'ext': 'mp4',
@@ -26,6 +25,9 @@ class DRBonanzaIE(InfoExtractor):
             'upload_date': '20110120',
             'duration': 3664,
         },
+        'params': {
+            'skip_download': True,  # requires rtmp
+        },
     }, {
         'url': 'http://www.dr.dk/bonanza/radio/serie/sport/fodbold.htm?assetId=59410',
         'md5': '6dfe039417e76795fb783c52da3de11d',
@@ -93,6 +95,11 @@ def parse_filename_info(url):
                         'format_id': file['Type'].replace('Video', ''),
                         'preference': preferencemap.get(file['Type'], -10),
                     })
+                    if format['url'].startswith('rtmp'):
+                        rtmp_url = format['url']
+                        format['rtmp_live'] = True  # --resume does not work
+                        if '/bonanza/' in rtmp_url:
+                            format['play_path'] = rtmp_url.split('/bonanza/')[1]
                     formats.append(format)
                 elif file['Type'] == "Thumb":
                     thumbnail = file['Location']
@@ -111,9 +118,6 @@ def parse_filename_info(url):
         description = '%s\n%s\n%s\n' % (
             info['Description'], info['Actors'], info['Colophon'])
 
-        for f in formats:
-            f['url'] = f['url'].replace('rtmp://vod-bonanza.gss.dr.dk/bonanza/', 'http://vodfiles.dr.dk/')
-            f['url'] = f['url'].replace('mp4:bonanza', 'bonanza')
         self._sort_formats(formats)
 
         display_id = re.sub(r'[^\w\d-]', '', re.sub(r' ', '-', title.lower())) + '-' + asset_id