From 68bb2fef9565159eba4a47f464b6b420cf2d5cda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 1 May 2016 07:15:23 +0600 Subject: [PATCH] [tagesschau] Restrict playlist entry regex --- youtube_dl/extractor/tagesschau.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 499bd260b..136e18f96 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -200,6 +200,10 @@ class TagesschauIE(InfoExtractor): }, { 'url': 'http://www.tagesschau.de/100sekunden/index.html', 'only_matching': True, + }, { + # playlist article with collapsing sections + 'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html', + 'only_matching': True, }] @classmethod @@ -275,7 +279,7 @@ def _real_extract(self, url): if webpage_type == 'website': # Article entries = [] for num, (entry_title, media_kind, download_text) in enumerate(re.findall( - r'(?s)]+class="infotext"[^>]*>.*?(.+?).*?

.*?%s' % DOWNLOAD_REGEX, + r'(?s)]+class="infotext"[^>]*>\s*(?:]+>)?\s*(.+?).*?

.*?%s' % DOWNLOAD_REGEX, webpage), 1): entries.append({ 'id': '%s-%d' % (display_id, num),