mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 00:25:06 +00:00
[extractor/redgifs] Fix extractor (#4892)
Closes #4805 Authored by: jhwgh1968
This commit is contained in:
parent
c7f540ea1e
commit
c53e5cf59f
1 changed files with 31 additions and 3 deletions
|
@ -18,6 +18,12 @@ class RedGifsBaseInfoExtractor(InfoExtractor):
|
||||||
'hd': None,
|
'hd': None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_API_HEADERS = {
|
||||||
|
'referer': 'https://www.redgifs.com/',
|
||||||
|
'origin': 'https://www.redgifs.com',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
}
|
||||||
|
|
||||||
def _parse_gif_data(self, gif_data):
|
def _parse_gif_data(self, gif_data):
|
||||||
video_id = gif_data.get('id')
|
video_id = gif_data.get('id')
|
||||||
quality = qualities(tuple(self._FORMATS.keys()))
|
quality = qualities(tuple(self._FORMATS.keys()))
|
||||||
|
@ -43,7 +49,7 @@ def _parse_gif_data(self, gif_data):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'webpage_url': f'https://redgifs.com/watch/{video_id}',
|
'webpage_url': f'https://redgifs.com/watch/{video_id}',
|
||||||
'ie_key': RedGifsIE.ie_key(),
|
'extractor_key': RedGifsIE.ie_key(),
|
||||||
'extractor': 'RedGifs',
|
'extractor': 'RedGifs',
|
||||||
'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
|
'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
|
||||||
'timestamp': int_or_none(gif_data.get('createDate')),
|
'timestamp': int_or_none(gif_data.get('createDate')),
|
||||||
|
@ -57,9 +63,29 @@ def _parse_gif_data(self, gif_data):
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _fetch_oauth_token(self, video_id):
|
||||||
|
# These pages contain the OAuth token that is necessary to make API calls.
|
||||||
|
index_page = self._download_webpage(f'https://www.redgifs.com/watch/{video_id}', video_id)
|
||||||
|
index_js_uri = self._html_search_regex(
|
||||||
|
r'href="?(/assets/js/index[.a-z0-9]*.js)"?\W', index_page, 'index_js_uri')
|
||||||
|
index_js = self._download_webpage(f'https://www.redgifs.com/{index_js_uri}', video_id)
|
||||||
|
# It turns out that a { followed by any valid JSON punctuation will always result in the
|
||||||
|
# first two characters of the base64 encoding being "ey".
|
||||||
|
# Use this fact to find any such string constant of a reasonable length with the correct
|
||||||
|
# punctuation for an oauth token
|
||||||
|
oauth_token = self._html_search_regex(
|
||||||
|
r'\w+\s*[=:]\s*"(ey[^"]+\.[^"]*\.[^"]{43,45})"', index_js, 'oauth token')
|
||||||
|
self._API_HEADERS['authorization'] = f'Bearer {oauth_token}'
|
||||||
|
|
||||||
def _call_api(self, ep, video_id, *args, **kwargs):
|
def _call_api(self, ep, video_id, *args, **kwargs):
|
||||||
|
if 'authorization' not in self._API_HEADERS:
|
||||||
|
self._fetch_oauth_token(video_id)
|
||||||
|
assert 'authorization' in self._API_HEADERS
|
||||||
|
|
||||||
|
headers = dict(self._API_HEADERS)
|
||||||
|
headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs)
|
f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs)
|
||||||
if 'error' in data:
|
if 'error' in data:
|
||||||
raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
|
raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
|
||||||
return data
|
return data
|
||||||
|
@ -102,6 +128,7 @@ class RedGifsIE(RedGifsBaseInfoExtractor):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'tags': list,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
|
'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
|
||||||
|
@ -117,13 +144,14 @@ class RedGifsIE(RedGifsBaseInfoExtractor):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'tags': list,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url).lower()
|
video_id = self._match_id(url).lower()
|
||||||
video_info = self._call_api(
|
video_info = self._call_api(
|
||||||
f'gifs/{video_id}', video_id, note='Downloading video info')
|
f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
|
||||||
return self._parse_gif_data(video_info['gif'])
|
return self._parse_gif_data(video_info['gif'])
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue