0
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-26 02:55:17 +00:00

#86 [youtube_live_chat] Use POST API (Closes #82)

YouTube has removed support for the old GET based live chat API, and it's now returning 404

Authored by siikamiika
This commit is contained in:
siikamiika 2021-02-15 11:57:21 +02:00 committed by GitHub
parent 7620cd46c3
commit 273762c8d0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 38 deletions

View file

@ -95,11 +95,12 @@ def _write_ytdl_file(self, ctx):
frag_index_stream.write(json.dumps({'downloader': downloader})) frag_index_stream.write(json.dumps({'downloader': downloader}))
frag_index_stream.close() frag_index_stream.close()
def _download_fragment(self, ctx, frag_url, info_dict, headers=None): def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
fragment_info_dict = { fragment_info_dict = {
'url': frag_url, 'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'), 'http_headers': headers or info_dict.get('http_headers'),
'request_data': request_data,
} }
success = ctx['dl'].download(fragment_filename, fragment_info_dict) success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success: if not success:

View file

@ -27,6 +27,7 @@
class HttpFD(FileDownloader): class HttpFD(FileDownloader):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
url = info_dict['url'] url = info_dict['url']
request_data = info_dict.get('request_data', None)
class DownloadContext(dict): class DownloadContext(dict):
__getattr__ = dict.get __getattr__ = dict.get
@ -101,7 +102,7 @@ def establish_connection():
range_end = ctx.data_len - 1 range_end = ctx.data_len - 1
has_range = range_start is not None has_range = range_start is not None
ctx.has_range = has_range ctx.has_range = has_range
request = sanitized_Request(url, None, headers) request = sanitized_Request(url, request_data, headers)
if has_range: if has_range:
set_range(request, range_start, range_end) set_range(request, range_start, range_end)
# Establish connection # Establish connection
@ -152,7 +153,7 @@ def establish_connection():
try: try:
# Open the connection again without the range header # Open the connection again without the range header
ctx.data = self.ydl.urlopen( ctx.data = self.ydl.urlopen(
sanitized_Request(url, None, headers)) sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length'] content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err: except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600: if err.code < 500 or err.code >= 600:

View file

@ -1,11 +1,13 @@
from __future__ import division, unicode_literals from __future__ import division, unicode_literals
import re
import json import json
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error from ..compat import compat_urllib_error
from ..utils import try_get from ..utils import (
try_get,
RegexNotFoundError,
)
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
@ -27,40 +29,28 @@ def real_download(self, filename, info_dict):
'total_frags': None, 'total_frags': None,
} }
def dl_fragment(url): ie = YT_BaseIE(self.ydl)
headers = info_dict.get('http_headers', {})
return self._download_fragment(ctx, url, info_dict, headers)
def parse_yt_initial_data(data): def dl_fragment(url, data=None, headers=None):
patterns = ( http_headers = info_dict.get('http_headers', {})
r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE), if headers:
r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE) http_headers = http_headers.copy()
data = data.decode('utf-8', 'replace') http_headers.update(headers)
for patt in patterns: return self._download_fragment(ctx, url, info_dict, http_headers, data)
try:
raw_json = re.search(patt, data).group(1)
return json.loads(raw_json)
except AttributeError:
continue
def download_and_parse_fragment(url, frag_index): def download_and_parse_fragment(url, frag_index, request_data):
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
success, raw_fragment = dl_fragment(url) success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
if not success: if not success:
return False, None, None return False, None, None
data = parse_yt_initial_data(raw_fragment)
if not data:
raw_data = json.loads(raw_fragment)
# sometimes youtube replies with a list
if not isinstance(raw_data, list):
raw_data = [raw_data]
try: try:
data = next(item['response'] for item in raw_data if 'response' in item) data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except StopIteration: except RegexNotFoundError:
data = {} data = None
if not data:
data = json.loads(raw_fragment)
live_chat_continuation = try_get( live_chat_continuation = try_get(
data, data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
@ -93,22 +83,37 @@ def download_and_parse_fragment(url, frag_index):
'https://www.youtube.com/watch?v={}'.format(video_id)) 'https://www.youtube.com/watch?v={}'.format(video_id))
if not success: if not success:
return False return False
data = parse_yt_initial_data(raw_fragment) try:
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
return False
continuation_id = try_get( continuation_id = try_get(
data, data,
lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
# no data yet but required to call _append_fragment # no data yet but required to call _append_fragment
self._append_fragment(ctx, b'') self._append_fragment(ctx, b'')
ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
if not ytcfg:
return False
api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
if not api_key or not innertube_context:
return False
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
frag_index = offset = 0 frag_index = offset = 0
while continuation_id is not None: while continuation_id is not None:
frag_index += 1 frag_index += 1
url = ''.join(( request_data = {
'https://www.youtube.com/live_chat_replay', 'context': innertube_context,
'/get_live_chat_replay' if frag_index > 1 else '', 'continuation': continuation_id,
'?continuation=%s' % continuation_id, }
'&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0) if frag_index > 1 else '')) if frag_index > 1:
success, continuation_id, offset = download_and_parse_fragment(url, frag_index) request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
success, continuation_id, offset = download_and_parse_fragment(
url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
if not success: if not success:
return False return False
if test: if test: