From a95ee6d8803fca9157adecf63732ab58bf87fd88 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:35:18 +0000 Subject: [PATCH] [ie/youtube] Fix `n` sig extraction for player `3bb1f723` (#11750) Closes #11744 Authored by: bashonly --- test/test_youtube_signature.py | 11 +++++++++-- yt_dlp/extractor/youtube.py | 15 +++++++++++---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 56db096ca..2a99436a6 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -188,6 +188,10 @@ 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js', 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw', ), + ( + 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', + 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ', + ), ] @@ -259,8 +263,11 @@ def signature(jscode, sig_input): def n_sig(jscode, sig_input): - funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) - return JSInterpreter(jscode).call_function(funcname, sig_input) + ie = YoutubeIE(FakeYDL()) + funcname = ie._extract_n_function_name(jscode) + jsi = JSInterpreter(jscode) + func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname))) + return func([sig_input]) make_sig_test = t_factory( diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c9b831618..2b026ef05 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3212,6 +3212,7 @@ def _extract_n_function_name(self, jscode, player_url=None): # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("") # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") + # * J.J="";J.url="";J.Z&&(R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}}; funcname, idx = self._search_regex( r'''(?x) (?: @@ -3228,7 +3229,7 @@ def _extract_n_function_name(self, jscode, player_url=None): )\)&&\(c=| \b(?P[a-zA-Z0-9_$]+)= )(?P[a-zA-Z0-9_$]+)(?:\[(?P\d+)\])?\([a-zA-Z]\) - (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''', + (?(var),[a-zA-Z0-9_$]+\.set\((?:"n+"|[a-zA-Z0-9_$]+)\,(?P=var)\))''', jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None)) if not funcname: self.report_warning(join_nonempty( @@ -3237,7 +3238,7 @@ def _extract_n_function_name(self, jscode, player_url=None): return self._search_regex( r'''(?xs) ;\s*(?P[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\) - \s*\{(?:(?!};).)+?["']enhanced_except_''', + \s*\{(?:(?!};).)+?return\s*(?P["'])[\w-]+_w8_(?P=q)\s*\+\s*[a-zA-Z0-9_$]+''', jscode, 'Initial JS player n function name', group='name') elif not idx: return funcname @@ -3246,6 +3247,11 @@ def _extract_n_function_name(self, jscode, player_url=None): rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode, f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)] + def _fixup_n_function_code(self, argnames, code): + return argnames, re.sub( + rf';\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(["\'])undefined\1\s*\)\s*return\s+{argnames[0]};', + ';', code) + def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09') @@ -3257,7 +3263,8 @@ def _extract_n_function_code(self, video_id, player_url): func_name = self._extract_n_function_name(jscode, player_url=player_url) - func_code = jsi.extract_function_code(func_name) + # XXX: Workaround for the `typeof` gotcha + func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name)) self.cache.store('youtube-nsig', player_id, func_code) return jsi, player_id, func_code @@ -3273,7 +3280,7 @@ def extract_nsig(s): except Exception as e: raise JSInterpreter.Exception(traceback.format_exc(), cause=e) - if ret.startswith('enhanced_except_'): + if ret.startswith('enhanced_except_') or ret.endswith(f'_w8_{s}'): raise JSInterpreter.Exception('Signature function returned an exception') return ret