1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-10-03 23:43:21 -04:00

[youtube] Fix throttling by decrypting n-sig (#1437)

This commit is contained in:
pukkandan
2021-10-31 09:53:58 +05:30
committed by GitHub
parent cd9ea4104b
commit 404f611f1c
5 changed files with 547 additions and 147 deletions

View File

@@ -1720,7 +1720,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('Cannot identify player %r' % player_url)
return id_m.group('id')
def _load_player(self, video_id, player_url, fatal=True) -> bool:
def _load_player(self, video_id, player_url, fatal=True):
player_id = self._extract_player_info(player_url)
if player_id not in self._code_cache:
code = self._download_webpage(
@@ -1729,7 +1729,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
errnote='Download of %s failed' % player_url)
if code:
self._code_cache[player_id] = code
return player_id in self._code_cache
return self._code_cache.get(player_id)
def _extract_signature_function(self, video_id, player_url, example_sig):
player_id = self._extract_player_info(player_url)
@@ -1743,8 +1743,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if cache_spec is not None:
return lambda s: ''.join(s[i] for i in cache_spec)
if self._load_player(video_id, player_url):
code = self._code_cache[player_id]
code = self._load_player(video_id, player_url)
if code:
res = self._parse_sig_js(code)
test_string = ''.join(map(compat_chr, range(len(example_sig))))
@@ -1755,6 +1755,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return res
def _print_sig_code(self, func, example_sig):
if not self.get_param('youtube_print_sig_code'):
return
def gen_sig_code(idxs):
def _genslice(start, end, step):
starts = '' if start == 0 else str(start)
@@ -1831,13 +1834,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
if self.get_param('youtube_print_sig_code'):
self._print_sig_code(func, s)
self._print_sig_code(func, s)
return func(s)
except Exception as e:
tb = traceback.format_exc()
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
def _decrypt_nsig(self, s, video_id, player_url):
"""Turn the encrypted n field into a working signature"""
if player_url is None:
raise ExtractorError('Cannot decrypt nsig without player_url')
if player_url.startswith('//'):
player_url = 'https:' + player_url
elif not re.match(r'https?://', player_url):
player_url = compat_urlparse.urljoin(
'https://www.youtube.com', player_url)
sig_id = ('nsig_value', s)
if sig_id in self._player_cache:
return self._player_cache[sig_id]
try:
player_id = ('nsig', player_url)
if player_id not in self._player_cache:
self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
func = self._player_cache[player_id]
self._player_cache[sig_id] = func(s)
self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
return self._player_cache[sig_id]
except Exception as e:
raise ExtractorError(traceback.format_exc(), cause=e)
def _extract_n_function_name(self, jscode):
return self._search_regex(
(r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
jscode, 'Initial JS player n function name', group='nfunc')
def _extract_n_function(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
func_code = self._downloader.cache.load('youtube-nsig', player_id)
if func_code:
jsi = JSInterpreter(func_code)
else:
jscode = self._load_player(video_id, player_url)
funcname = self._extract_n_function_name(jscode)
jsi = JSInterpreter(jscode)
func_code = jsi.extract_function_code(funcname)
self._downloader.cache.store('youtube-nsig', player_id, func_code)
if self.get_param('youtube_print_sig_code'):
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
return lambda s: jsi.extract_function_from_code(*func_code)([s])
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
"""
@@ -1856,9 +1904,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError(error_msg)
self.report_warning(error_msg)
return
if self._load_player(video_id, player_url, fatal=fatal):
player_id = self._extract_player_info(player_url)
code = self._code_cache[player_id]
code = self._load_player(video_id, player_url, fatal=fatal)
if code:
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
'JS player signature timestamp', group='sts', fatal=fatal))
@@ -2440,6 +2487,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
fmt_url += '&' + sp + '=' + signature
query = parse_qs(fmt_url)
throttled = False
if query.get('ratebypass') != ['yes'] and query.get('n'):
try:
fmt_url = update_url_query(fmt_url, {
'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
except ExtractorError as e:
self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True)
throttled = True
if itag:
itags.append(itag)
stream_ids.append(stream_id)
@@ -2453,7 +2510,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format_note': ', '.join(filter(None, (
'%s%s' % (audio_track.get('displayName') or '',
' (default)' if audio_track.get('audioIsDefault') else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
throttled and 'THROTTLED'))),
'source_preference': -10 if not throttled else -1,
'fps': int_or_none(fmt.get('fps')),
'height': height,
'quality': q(quality),
@@ -2645,12 +2704,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if reason:
self.raise_no_formats(reason, expected=True)
for f in formats:
if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
f['source_preference'] = -10
# TODO: this method is not reliable
f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
# Source is given priority since formats that throttle are given lower source_preference
# When throttling issue is fully fixed, remove this
self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang'))