mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-10-03 23:43:21 -04:00
[youtube] Fix throttling by decrypting n-sig (#1437)
This commit is contained in:
@@ -1720,7 +1720,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
return id_m.group('id')
|
||||
|
||||
def _load_player(self, video_id, player_url, fatal=True) -> bool:
|
||||
def _load_player(self, video_id, player_url, fatal=True):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
if player_id not in self._code_cache:
|
||||
code = self._download_webpage(
|
||||
@@ -1729,7 +1729,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
errnote='Download of %s failed' % player_url)
|
||||
if code:
|
||||
self._code_cache[player_id] = code
|
||||
return player_id in self._code_cache
|
||||
return self._code_cache.get(player_id)
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
@@ -1743,8 +1743,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if cache_spec is not None:
|
||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||
|
||||
if self._load_player(video_id, player_url):
|
||||
code = self._code_cache[player_id]
|
||||
code = self._load_player(video_id, player_url)
|
||||
if code:
|
||||
res = self._parse_sig_js(code)
|
||||
|
||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||
@@ -1755,6 +1755,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return res
|
||||
|
||||
def _print_sig_code(self, func, example_sig):
|
||||
if not self.get_param('youtube_print_sig_code'):
|
||||
return
|
||||
|
||||
def gen_sig_code(idxs):
|
||||
def _genslice(start, end, step):
|
||||
starts = '' if start == 0 else str(start)
|
||||
@@ -1831,13 +1834,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
)
|
||||
self._player_cache[player_id] = func
|
||||
func = self._player_cache[player_id]
|
||||
if self.get_param('youtube_print_sig_code'):
|
||||
self._print_sig_code(func, s)
|
||||
self._print_sig_code(func, s)
|
||||
return func(s)
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
raise ExtractorError(
|
||||
'Signature extraction failed: ' + tb, cause=e)
|
||||
raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
|
||||
|
||||
def _decrypt_nsig(self, s, video_id, player_url):
|
||||
"""Turn the encrypted n field into a working signature"""
|
||||
if player_url is None:
|
||||
raise ExtractorError('Cannot decrypt nsig without player_url')
|
||||
if player_url.startswith('//'):
|
||||
player_url = 'https:' + player_url
|
||||
elif not re.match(r'https?://', player_url):
|
||||
player_url = compat_urlparse.urljoin(
|
||||
'https://www.youtube.com', player_url)
|
||||
|
||||
sig_id = ('nsig_value', s)
|
||||
if sig_id in self._player_cache:
|
||||
return self._player_cache[sig_id]
|
||||
|
||||
try:
|
||||
player_id = ('nsig', player_url)
|
||||
if player_id not in self._player_cache:
|
||||
self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
|
||||
func = self._player_cache[player_id]
|
||||
self._player_cache[sig_id] = func(s)
|
||||
self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
|
||||
return self._player_cache[sig_id]
|
||||
except Exception as e:
|
||||
raise ExtractorError(traceback.format_exc(), cause=e)
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
return self._search_regex(
|
||||
(r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
|
||||
jscode, 'Initial JS player n function name', group='nfunc')
|
||||
|
||||
def _extract_n_function(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
func_code = self._downloader.cache.load('youtube-nsig', player_id)
|
||||
|
||||
if func_code:
|
||||
jsi = JSInterpreter(func_code)
|
||||
else:
|
||||
jscode = self._load_player(video_id, player_url)
|
||||
funcname = self._extract_n_function_name(jscode)
|
||||
jsi = JSInterpreter(jscode)
|
||||
func_code = jsi.extract_function_code(funcname)
|
||||
self._downloader.cache.store('youtube-nsig', player_id, func_code)
|
||||
|
||||
if self.get_param('youtube_print_sig_code'):
|
||||
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
|
||||
|
||||
return lambda s: jsi.extract_function_from_code(*func_code)([s])
|
||||
|
||||
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
||||
"""
|
||||
@@ -1856,9 +1904,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
raise ExtractorError(error_msg)
|
||||
self.report_warning(error_msg)
|
||||
return
|
||||
if self._load_player(video_id, player_url, fatal=fatal):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
code = self._code_cache[player_id]
|
||||
code = self._load_player(video_id, player_url, fatal=fatal)
|
||||
if code:
|
||||
sts = int_or_none(self._search_regex(
|
||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
|
||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||
@@ -2440,6 +2487,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
|
||||
fmt_url += '&' + sp + '=' + signature
|
||||
|
||||
query = parse_qs(fmt_url)
|
||||
throttled = False
|
||||
if query.get('ratebypass') != ['yes'] and query.get('n'):
|
||||
try:
|
||||
fmt_url = update_url_query(fmt_url, {
|
||||
'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
|
||||
except ExtractorError as e:
|
||||
self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True)
|
||||
throttled = True
|
||||
|
||||
if itag:
|
||||
itags.append(itag)
|
||||
stream_ids.append(stream_id)
|
||||
@@ -2453,7 +2510,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'format_note': ', '.join(filter(None, (
|
||||
'%s%s' % (audio_track.get('displayName') or '',
|
||||
' (default)' if audio_track.get('audioIsDefault') else ''),
|
||||
fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
|
||||
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
|
||||
throttled and 'THROTTLED'))),
|
||||
'source_preference': -10 if not throttled else -1,
|
||||
'fps': int_or_none(fmt.get('fps')),
|
||||
'height': height,
|
||||
'quality': q(quality),
|
||||
@@ -2645,12 +2704,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if reason:
|
||||
self.raise_no_formats(reason, expected=True)
|
||||
|
||||
for f in formats:
|
||||
if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
|
||||
f['source_preference'] = -10
|
||||
# TODO: this method is not reliable
|
||||
f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
|
||||
|
||||
# Source is given priority since formats that throttle are given lower source_preference
|
||||
# When throttling issue is fully fixed, remove this
|
||||
self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang'))
|
||||
|
Reference in New Issue
Block a user