From 298d9c0e891b1a0fbc3ec6d3674ff6fbc550d6ec Mon Sep 17 00:00:00 2001 From: Djeson <61365937+DjesonPV@users.noreply.github.com> Date: Sun, 7 Aug 2022 22:21:53 +0200 Subject: [PATCH] [extractor/ninegag] Extract uploader (#4597) Closes #4587 Authored by: DjesonPV --- yt_dlp/extractor/ninegag.py | 45 +++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/ninegag.py b/yt_dlp/extractor/ninegag.py index 00ca95ea2..86e710f2b 100644 --- a/yt_dlp/extractor/ninegag.py +++ b/yt_dlp/extractor/ninegag.py @@ -3,7 +3,7 @@ ExtractorError, determine_ext, int_or_none, - try_get, + traverse_obj, unescapeHTML, url_or_none, ) @@ -11,18 +11,20 @@ class NineGagIE(InfoExtractor): IE_NAME = '9gag' + IE_DESC = '9GAG' _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://9gag.com/gag/ae5Ag7B', 'info_dict': { 'id': 'ae5Ag7B', - 'ext': 'mp4', + 'ext': 'webm', 'title': 'Capybara Agility Training', 'upload_date': '20191108', 'timestamp': 1573237208, + 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg', 'categories': ['Awesome'], - 'tags': ['Weimaraner', 'American Pit Bull Terrier'], + 'tags': ['Awesome'], 'duration': 44, 'like_count': int, 'dislike_count': int, @@ -32,6 +34,26 @@ class NineGagIE(InfoExtractor): # HTML escaped title 'url': 'https://9gag.com/gag/av5nvyb', 'only_matching': True, + }, { + # Non Anonymous Uploader + 'url': 'https://9gag.com/gag/ajgp66G', + 'info_dict': { + 'id': 'ajgp66G', + 'ext': 'webm', + 'title': 'Master Shifu! Or Splinter! You decide:', + 'upload_date': '20220806', + 'timestamp': 1659803411, + 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg', + 'categories': ['Funny'], + 'tags': ['Funny'], + 'duration': 26, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'uploader': 'Peter Klaus', + 'uploader_id': 'peterklaus12', + 'uploader_url': 'https://9gag.com/u/peterklaus12', + } }] def _real_extract(self, url): @@ -46,8 +68,6 @@ def _real_extract(self, url): 'The given url does not contain a video', expected=True) - title = unescapeHTML(post['title']) - duration = None formats = [] thumbnails = [] @@ -98,7 +118,7 @@ def _real_extract(self, url): formats.append(common) self._sort_formats(formats) - section = try_get(post, lambda x: x['postSection']['name']) + section = traverse_obj(post, ('postSection', 'name')) tags = None post_tags = post.get('tags') @@ -110,18 +130,19 @@ def _real_extract(self, url): continue tags.append(tag_key) - get_count = lambda x: int_or_none(post.get(x + 'Count')) - return { 'id': post_id, - 'title': title, + 'title': unescapeHTML(post.get('title')), 'timestamp': int_or_none(post.get('creationTs')), 'duration': duration, + 'uploader': traverse_obj(post, ('creator', 'fullName')), + 'uploader_id': traverse_obj(post, ('creator', 'username')), + 'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))), 'formats': formats, 'thumbnails': thumbnails, - 'like_count': get_count('upVote'), - 'dislike_count': get_count('downVote'), - 'comment_count': get_count('comments'), + 'like_count': int_or_none(post.get('upVoteCount')), + 'dislike_count': int_or_none(post.get('downVoteCount')), + 'comment_count': int_or_none(post.get('commentsCount')), 'age_limit': 18 if post.get('nsfw') == 1 else None, 'categories': [section] if section else None, 'tags': tags,