[ie/patreon] Fix comments extraction (#11530)

Closes #11483 Authored by: jshumphrey, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-11-21 02:05:12 +00:00 · 2024-11-16 14:02:14 -06:00 · 2024-11-16 14:02:14 -06:00 · 1d253b0a27
commit 1d253b0a27
parent 720b3dc453
1 changed files with 35 additions and 16 deletions
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@ -16,10 +16,10 @@
    parse_iso8601,
    smuggle_url,
    str_or_none,
    traverse_obj,
    url_or_none,
    urljoin,
 )
 from ..utils.traversal import traverse_obj, value
 class PatreonBaseIE(InfoExtractor):
@ -252,6 +252,27 @@ class PatreonIE(PatreonBaseIE):
            'thumbnail': r're:^https?://.+',
        },
        'skip': 'Patron-only content',
    }, {
        # Contains a comment reply in the 'included' section
        'url': 'https://www.patreon.com/posts/114721679',
        'info_dict': {
            'id': '114721679',
            'ext': 'mp4',
            'upload_date': '20241025',
            'uploader': 'Japanalysis',
            'like_count': int,
            'thumbnail': r're:^https?://.+',
            'comment_count': int,
            'title': 'Karasawa Part 2',
            'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
            'uploader_url': 'https://www.patreon.com/japanalysis',
            'uploader_id': '80504268',
            'channel_url': 'https://www.patreon.com/japanalysis',
            'channel_follower_count': int,
            'timestamp': 1729897015,
            'channel_id': '9346307',
        },
        'params': {'getcomments': True},
    }]
    _RETURN_TYPE = 'video'
@ -404,26 +425,24 @@ def _get_comments(self, post_id):
                f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
            cursor = None
-            for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)):
+            for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])):
                count += 1
                comment_id = comment.get('id')
                attributes = comment.get('attributes') or {}
                if comment_id is None:
                    continue
                author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
                author_info = traverse_obj(
                    response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
                    get_all=False, expected_type=dict, default={})
                yield {
-                    'id': comment_id,
+                    **traverse_obj(comment, {
-                    'text': attributes.get('body'),
+                        'id': ('id', {str_or_none}),
-                    'timestamp': parse_iso8601(attributes.get('created')),
+                        'text': ('attributes', 'body', {str}),
-                    'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'),
+                        'timestamp': ('attributes', 'created', {parse_iso8601}),
-                    'author_is_uploader': attributes.get('is_by_creator'),
+                        'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any),
                        'author_is_uploader': ('attributes', 'is_by_creator', {bool}),
                    }),
                    **traverse_obj(response, (
                        'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', {
                            'author': ('full_name', {str}),
                            'author_thumbnail': ('image_url', {url_or_none}),
                        }), get_all=False),
                    'author_id': author_id,
                    'author': author_info.get('full_name'),
                    'author_thumbnail': author_info.get('image_url'),
                }
            if count < traverse_obj(response, ('meta', 'count')):