[bbc] Fix BBC Extractor to work with 'School Report'

2024-11-04 23:35:04 +00:00 · 2016-03-11 09:31:35 -06:00 · 2016-03-11 09:31:35 -06:00 · 8e4aa7bf18
commit 8e4aa7bf18
parent a42dfa629e
1 changed files with 8 additions and 7 deletions
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@ -10,7 +10,6 @@
    int_or_none,
    parse_duration,
    parse_iso8601,
-    remove_end,
    unescapeHTML,
 )
 from ..compat import (
@ -796,9 +795,15 @@ def _real_extract(self, url):
                            entries.append(self._extract_from_playlist_sxml(
                                playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))

+        playlist_title = self._og_search_title(webpage, default=None)
+        playlist_title = playlist_title or self._html_search_regex(
+            r'<title>(.*?)</title>', webpage, 'playlist title')
+
+        playlist_title = self._search_regex(r'(.+)\s*-\s*BBC', playlist_title, 'title', default=playlist_title)
+
+        playlist_description = self._og_search_description(webpage, default=None)
+
        if entries:
-            playlist_title = playlist_title or remove_end(self._og_search_title(webpage), ' - BBC News')
-            playlist_description = playlist_description or self._og_search_description(webpage, default=None)
            return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)

        # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
@ -829,10 +834,6 @@ def _real_extract(self, url):
                'subtitles': subtitles,
            }

-        playlist_title = self._html_search_regex(
-            r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
-        playlist_description = self._og_search_description(webpage, default=None)
-
        def extract_all(pattern):
            return list(filter(None, map(
                lambda s: self._parse_json(s, playlist_id, fatal=False),