#86 [youtube_live_chat] Use POST API (Closes #82)

YouTube has removed support for the old GET based live chat API, and it's now returning 404

Authored by siikamiika
This commit is contained in:
siikamiika 2021-02-15 11:57:21 +02:00 committed by GitHub
parent 7620cd46c3
commit 273762c8d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 38 deletions

View File

@ -95,11 +95,12 @@ def _write_ytdl_file(self, ctx):
frag_index_stream.write(json.dumps({'downloader': downloader}))
frag_index_stream.close()
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
fragment_info_dict = {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
'request_data': request_data,
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:

View File

@ -27,6 +27,7 @@
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
request_data = info_dict.get('request_data', None)
class DownloadContext(dict):
__getattr__ = dict.get
@ -101,7 +102,7 @@ def establish_connection():
range_end = ctx.data_len - 1
has_range = range_start is not None
ctx.has_range = has_range
request = sanitized_Request(url, None, headers)
request = sanitized_Request(url, request_data, headers)
if has_range:
set_range(request, range_start, range_end)
# Establish connection
@ -152,7 +153,7 @@ def establish_connection():
try:
# Open the connection again without the range header
ctx.data = self.ydl.urlopen(
sanitized_Request(url, None, headers))
sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:

View File

@ -1,11 +1,13 @@
from __future__ import division, unicode_literals
import re
import json
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import try_get
from ..utils import (
try_get,
RegexNotFoundError,
)
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
@ -27,40 +29,28 @@ def real_download(self, filename, info_dict):
'total_frags': None,
}
def dl_fragment(url):
headers = info_dict.get('http_headers', {})
return self._download_fragment(ctx, url, info_dict, headers)
ie = YT_BaseIE(self.ydl)
def parse_yt_initial_data(data):
patterns = (
r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE),
r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE)
data = data.decode('utf-8', 'replace')
for patt in patterns:
try:
raw_json = re.search(patt, data).group(1)
return json.loads(raw_json)
except AttributeError:
continue
def dl_fragment(url, data=None, headers=None):
http_headers = info_dict.get('http_headers', {})
if headers:
http_headers = http_headers.copy()
http_headers.update(headers)
return self._download_fragment(ctx, url, info_dict, http_headers, data)
def download_and_parse_fragment(url, frag_index):
def download_and_parse_fragment(url, frag_index, request_data):
count = 0
while count <= fragment_retries:
try:
success, raw_fragment = dl_fragment(url)
success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
if not success:
return False, None, None
data = parse_yt_initial_data(raw_fragment)
try:
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
data = None
if not data:
raw_data = json.loads(raw_fragment)
# sometimes youtube replies with a list
if not isinstance(raw_data, list):
raw_data = [raw_data]
try:
data = next(item['response'] for item in raw_data if 'response' in item)
except StopIteration:
data = {}
data = json.loads(raw_fragment)
live_chat_continuation = try_get(
data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
@ -93,22 +83,37 @@ def download_and_parse_fragment(url, frag_index):
'https://www.youtube.com/watch?v={}'.format(video_id))
if not success:
return False
data = parse_yt_initial_data(raw_fragment)
try:
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
return False
continuation_id = try_get(
data,
lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
# no data yet but required to call _append_fragment
self._append_fragment(ctx, b'')
ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
if not ytcfg:
return False
api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
if not api_key or not innertube_context:
return False
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
frag_index = offset = 0
while continuation_id is not None:
frag_index += 1
url = ''.join((
'https://www.youtube.com/live_chat_replay',
'/get_live_chat_replay' if frag_index > 1 else '',
'?continuation=%s' % continuation_id,
'&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0) if frag_index > 1 else ''))
success, continuation_id, offset = download_and_parse_fragment(url, frag_index)
request_data = {
'context': innertube_context,
'continuation': continuation_id,
}
if frag_index > 1:
request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
success, continuation_id, offset = download_and_parse_fragment(
url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
if not success:
return False
if test: