From ac184ab742115ba9f8d3365c0f630db53567fc01 Mon Sep 17 00:00:00 2001 From: ajj8 <35781586+ajj8@users.noreply.github.com> Date: Thu, 17 Feb 2022 15:54:53 +0000 Subject: [PATCH] [bbc] Fix extraction of news articles (#2811) Closes #1374 Authored by: ajj8 --- yt_dlp/extractor/bbc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 85ab478a6..199a3f8e2 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1171,9 +1171,9 @@ def _real_extract(self, url): return self.playlist_result( entries, playlist_id, playlist_title, playlist_description) - initial_data = self._parse_json(self._search_regex( - r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage, - 'preload state', default='{}'), playlist_id, fatal=False) + initial_data = self._parse_json(self._parse_json(self._search_regex( + r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage, + 'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False) if initial_data: def parse_media(media): if not media: @@ -1214,7 +1214,7 @@ def parse_media(media): if name == 'media-experience': parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict)) elif name == 'article': - for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []): + for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []): if block.get('type') != 'media': continue parse_media(block.get('model'))