[xtube] fix extractor (#17)

2024-11-19 01:45:13 +00:00 · 2020-10-28 13:48:55 -07:00 · 2020-10-28 13:48:55 -07:00 · 576d233fe6
commit 576d233fe6
parent ddb77f30ee
1 changed files with 20 additions and 27 deletions
--- a/youtube_dlc/extractor/xtube.py
+++ b/youtube_dlc/extractor/xtube.py
@ -5,6 +5,7 @@
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    int_or_none,
    js_to_json,
    orderedSet,
@ -33,27 +34,11 @@ class XTubeIE(InfoExtractor):
            'title': 'strange erotica',
            'description': 'contains:an ET kind of thing',
            'uploader': 'greenshowers',
-            'duration': 450,
+            'duration': 449,
            'view_count': int,
            'comment_count': int,
            'age_limit': 18,
        }
    }, {
        # FLV videos with duplicated formats
        'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
        'md5': 'a406963eb349dd43692ec54631efd88b',
        'info_dict': {
            'id': '9299752',
            'display_id': 'A-Super-Run-Part-1-YT',
            'ext': 'flv',
            'title': 'A Super Run - Part 1 (YT)',
            'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
            'uploader': 'tshirtguy59',
            'duration': 579,
            'view_count': int,
            'comment_count': int,
            'age_limit': 18,
        },
    }, {
        # new URL schema
        'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
@ -89,16 +74,24 @@ def _real_extract(self, url):
        title, thumbnail, duration = [None] * 3
-        config = self._parse_json(self._search_regex(
+        json_config_string = self._search_regex(
-            r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
+            r'playerConf=({.+?}),loaderConf',
-            default='{}'), video_id, transform_source=js_to_json, fatal=False)
+            webpage, 'config', default=None)
-        if config:
+        if not json_config_string:
-            config = config.get('mainRoll')
+            raise ExtractorError("Could not extract video player data")
-            if isinstance(config, dict):
+
-                title = config.get('title')
+        json_config_string = json_config_string.replace("!0", "true").replace("!1", "false")
-                thumbnail = config.get('poster')
+
-                duration = int_or_none(config.get('duration'))
+        config = self._parse_json(json_config_string, video_id, transform_source=js_to_json, fatal=False)
-                sources = config.get('sources') or config.get('format')
+        if not config:
            raise ExtractorError("Could not extract video player data")
        config = config.get('mainRoll')
        if isinstance(config, dict):
            title = config.get('title')
            thumbnail = config.get('poster')
            duration = int_or_none(config.get('duration'))
            sources = config.get('sources') or config.get('format')
        if not isinstance(sources, dict):
            sources = self._parse_json(self._search_regex(