[extractor/facebook] Fix metadata extraction (#6856)

Closes #3432 Authored by: ringus1
2024-11-26 02:55:17 +00:00 · 2023-05-09 01:19:42 +02:00 · 2023-05-09 01:19:42 +02:00 · 3b52a60688
commit 3b52a60688
parent c449c0655d
1 changed files with 9 additions and 5 deletions
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@ -390,7 +390,10 @@ def extract_metadata(webpage):
                k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
            title = get_first(media, ('title', 'text'))
            description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
-            uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
+            uploader_data = (
+                get_first(media, ('owner', {dict}))
+                or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
+                or get_first(post, ('node', 'actors', ..., {dict})) or {})

            page_title = title or self._html_search_regex((
                r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
@ -415,16 +418,17 @@ def extract_metadata(webpage):
            # in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/
            if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail):
                thumbnail = None
-            view_count = parse_count(self._search_regex(
-                r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
-                default=None))
            info_dict = {
                'description': description,
                'uploader': uploader,
                'uploader_id': uploader_data.get('id'),
                'timestamp': timestamp,
                'thumbnail': thumbnail,
-                'view_count': view_count,
+                'view_count': parse_count(self._search_regex(
+                    (r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',),
+                    webpage, 'view count', default=None)),
+                'concurrent_view_count': get_first(post, (
+                    ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
            }

            info_json_ld = self._search_json_ld(webpage, video_id, default={})