mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-19 05:29:59 +00:00
[ie/patreon] Extract all m3u8 formats for locked posts (#11138)
Closes #11125 Authored by: bashonly
This commit is contained in:
parent
b31b81d85f
commit
f91645acea
1 changed files with 21 additions and 7 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
@ -22,13 +23,19 @@
|
||||||
|
|
||||||
|
|
||||||
class PatreonBaseIE(InfoExtractor):
|
class PatreonBaseIE(InfoExtractor):
|
||||||
USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
|
@functools.cached_property
|
||||||
|
def patreon_user_agent(self):
|
||||||
|
# Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection.
|
||||||
|
# Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in
|
||||||
|
if self._get_cookies('https://www.patreon.com/').get('session_id'):
|
||||||
|
return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)'
|
||||||
|
return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)'
|
||||||
|
|
||||||
def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
|
def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None):
|
||||||
if headers is None:
|
if headers is None:
|
||||||
headers = {}
|
headers = {}
|
||||||
if 'User-Agent' not in headers:
|
if 'User-Agent' not in headers:
|
||||||
headers['User-Agent'] = self.USER_AGENT
|
headers['User-Agent'] = self.patreon_user_agent
|
||||||
if query:
|
if query:
|
||||||
query.update({'json-api-version': 1.0})
|
query.update({'json-api-version': 1.0})
|
||||||
|
|
||||||
|
@ -111,6 +118,7 @@ class PatreonIE(PatreonBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'channel_is_verified': True,
|
'channel_is_verified': True,
|
||||||
'chapters': 'count:4',
|
'chapters': 'count:4',
|
||||||
|
'timestamp': 1423689666,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'noplaylist': True,
|
'noplaylist': True,
|
||||||
|
@ -221,6 +229,7 @@ class PatreonIE(PatreonBaseIE):
|
||||||
'thumbnail': r're:^https?://.+',
|
'thumbnail': r're:^https?://.+',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||||
}, {
|
}, {
|
||||||
# multiple attachments/embeds
|
# multiple attachments/embeds
|
||||||
'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
|
'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
|
||||||
|
@ -326,8 +335,13 @@ def _real_extract(self, url):
|
||||||
if embed_url and (urlh := self._request_webpage(
|
if embed_url and (urlh := self._request_webpage(
|
||||||
embed_url, video_id, 'Checking embed URL', headers=headers,
|
embed_url, video_id, 'Checking embed URL', headers=headers,
|
||||||
fatal=False, errnote=False, expected_status=403)):
|
fatal=False, errnote=False, expected_status=403)):
|
||||||
|
# Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need
|
||||||
|
# to check for "Sorry, we couldn’t find that page" in the meta description tag
|
||||||
|
meta_description = clean_html(self._html_search_meta(
|
||||||
|
'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None))
|
||||||
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
|
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
|
||||||
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
|
if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page')
|
||||||
|
or VidsIoIE.suitable(embed_url)):
|
||||||
entries.append(self.url_result(smuggle_url(embed_url, headers)))
|
entries.append(self.url_result(smuggle_url(embed_url, headers)))
|
||||||
|
|
||||||
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
||||||
|
@ -427,7 +441,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||||
'title': 'Cognitive Dissonance Podcast',
|
'title': 'Cognitive Dissonance Podcast',
|
||||||
'channel_url': 'https://www.patreon.com/dissonancepod',
|
'channel_url': 'https://www.patreon.com/dissonancepod',
|
||||||
'id': '80642',
|
'id': '80642',
|
||||||
'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7',
|
'description': r're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*',
|
||||||
'channel_id': '80642',
|
'channel_id': '80642',
|
||||||
'channel': 'Cognitive Dissonance Podcast',
|
'channel': 'Cognitive Dissonance Podcast',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
@ -445,7 +459,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||||
'id': '4767637',
|
'id': '4767637',
|
||||||
'channel_id': '4767637',
|
'channel_id': '4767637',
|
||||||
'channel_url': 'https://www.patreon.com/notjustbikes',
|
'channel_url': 'https://www.patreon.com/notjustbikes',
|
||||||
'description': 'md5:9f4b70051216c4d5c58afe580ffc8d0f',
|
'description': r're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'channel': 'Not Just Bikes',
|
'channel': 'Not Just Bikes',
|
||||||
'uploader_url': 'https://www.patreon.com/notjustbikes',
|
'uploader_url': 'https://www.patreon.com/notjustbikes',
|
||||||
|
@ -462,7 +476,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||||
'id': '4243769',
|
'id': '4243769',
|
||||||
'channel_id': '4243769',
|
'channel_id': '4243769',
|
||||||
'channel_url': 'https://www.patreon.com/secondthought',
|
'channel_url': 'https://www.patreon.com/secondthought',
|
||||||
'description': 'md5:69c89a3aba43efdb76e85eb023e8de8b',
|
'description': r're:(?s).*Second Thought is an educational YouTube channel.*',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'channel': 'Second Thought',
|
'channel': 'Second Thought',
|
||||||
'uploader_url': 'https://www.patreon.com/secondthought',
|
'uploader_url': 'https://www.patreon.com/secondthought',
|
||||||
|
@ -512,7 +526,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
|
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
|
||||||
if campaign_id is None:
|
if campaign_id is None:
|
||||||
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
|
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
|
||||||
campaign_id = self._search_nextjs_data(
|
campaign_id = self._search_nextjs_data(
|
||||||
webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
|
webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue