mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-10-03 23:43:21 -04:00
Update to ytdl-2021.02.10
Except: [archiveorg] Fix and improve extraction (5fc53690cbe6abb11941a3f4846b566a7472753e)
This commit is contained in:
@@ -32,7 +32,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_codecs,
|
||||
parse_duration,
|
||||
# qualities,
|
||||
# qualities, # TODO: Enable this after fixing formatSort
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
@@ -414,7 +414,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?(1).+)? # if we found the ID, everything can follow
|
||||
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||
_PLAYER_INFO_RE = (
|
||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.js$',
|
||||
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
|
||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
|
||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
|
||||
)
|
||||
_formats = {
|
||||
@@ -621,6 +622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'AfrojackVEVO',
|
||||
'uploader_id': 'AfrojackVEVO',
|
||||
'upload_date': '20131011',
|
||||
'abr': 129.495,
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
@@ -1134,10 +1136,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# Age-gated video only available with authentication (unavailable
|
||||
# via embed page workaround)
|
||||
'url': 'XgnwCQzjau8',
|
||||
'only_matching': True,
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/28094
|
||||
'url': 'OtqTfy26tG0',
|
||||
'info_dict': {
|
||||
'id': 'OtqTfy26tG0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Burn Out',
|
||||
'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
|
||||
'upload_date': '20141120',
|
||||
'uploader': 'The Cinematic Orchestra - Topic',
|
||||
'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
|
||||
'artist': 'The Cinematic Orchestra',
|
||||
'track': 'Burn Out',
|
||||
'album': 'Every Day',
|
||||
'release_data': None,
|
||||
'release_year': None,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@@ -1230,6 +1248,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
funcname = self._search_regex(
|
||||
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
|
||||
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
# Obsolete patterns
|
||||
@@ -1493,7 +1514,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
formats = []
|
||||
itags = []
|
||||
itag_qualities = {}
|
||||
player_url = None
|
||||
# TODO: Enable this after fixing formatSort
|
||||
# q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
|
||||
streaming_data = player_response.get('streamingData') or {}
|
||||
streaming_formats = streaming_data.get('formats') or []
|
||||
@@ -1502,6 +1525,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
|
||||
continue
|
||||
|
||||
itag = str_or_none(fmt.get('itag'))
|
||||
quality = fmt.get('quality')
|
||||
if itag and quality:
|
||||
itag_qualities[itag] = quality
|
||||
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
||||
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
|
||||
# number of fragment that would subsequently requested with (`&sq=N`)
|
||||
if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
|
||||
continue
|
||||
|
||||
fmt_url = fmt.get('url')
|
||||
if not fmt_url:
|
||||
sc = compat_parse_qs(fmt.get('signatureCipher'))
|
||||
@@ -1521,10 +1554,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
|
||||
fmt_url += '&' + sp + '=' + signature
|
||||
|
||||
itag = str_or_none(fmt.get('itag'))
|
||||
if itag:
|
||||
itags.append(itag)
|
||||
quality = fmt.get('quality')
|
||||
tbr = float_or_none(
|
||||
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
||||
dct = {
|
||||
'asr': int_or_none(fmt.get('audioSampleRate')),
|
||||
'filesize': int_or_none(fmt.get('contentLength')),
|
||||
@@ -1532,9 +1565,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'format_note': fmt.get('qualityLabel') or quality,
|
||||
'fps': int_or_none(fmt.get('fps')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
# 'quality': q(quality), # This does not correctly reflect the overall quality of the format
|
||||
'tbr': float_or_none(fmt.get(
|
||||
'averageBitrate') or fmt.get('bitrate'), 1000),
|
||||
# 'quality': q(quality), # TODO: Enable this after fixing formatSort
|
||||
'tbr': tbr,
|
||||
'url': fmt_url,
|
||||
'width': fmt.get('width'),
|
||||
}
|
||||
@@ -1545,7 +1577,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if mobj:
|
||||
dct['ext'] = mimetype2ext(mobj.group(1))
|
||||
dct.update(parse_codecs(mobj.group(2)))
|
||||
if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
|
||||
no_audio = dct.get('acodec') == 'none'
|
||||
no_video = dct.get('vcodec') == 'none'
|
||||
if no_audio:
|
||||
dct['vbr'] = tbr
|
||||
if no_video:
|
||||
dct['abr'] = tbr
|
||||
if no_audio or no_video:
|
||||
dct['downloader_options'] = {
|
||||
# Youtube throttles chunks >~10M
|
||||
'http_chunk_size': 10485760,
|
||||
@@ -1565,22 +1603,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if self._downloader.params.get('youtube_include_dash_manifest'):
|
||||
dash_manifest_url = streaming_data.get('dashManifestUrl')
|
||||
if dash_manifest_url:
|
||||
dash_formats = []
|
||||
for f in self._extract_mpd_formats(
|
||||
dash_manifest_url, video_id, fatal=False):
|
||||
itag = f['format_id']
|
||||
if itag in itags:
|
||||
continue
|
||||
# if itag in itag_qualities: # TODO: Enable this after fixing formatSort
|
||||
# f['quality'] = q(itag_qualities[itag])
|
||||
filesize = int_or_none(self._search_regex(
|
||||
r'/clen/(\d+)', f.get('fragment_base_url')
|
||||
or f['url'], 'file size', default=None))
|
||||
if filesize:
|
||||
f['filesize'] = filesize
|
||||
dash_formats.append(f)
|
||||
# Until further investigation prefer DASH formats as non-DASH
|
||||
# may not be available (see [1])
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/28070
|
||||
if dash_formats:
|
||||
dash_formats_keys = [f['format_id'] for f in dash_formats]
|
||||
formats = [f for f in formats if f['format_id'] not in dash_formats_keys]
|
||||
formats.extend(dash_formats)
|
||||
formats.append(f)
|
||||
|
||||
if not formats:
|
||||
if streaming_data.get('licenseInfos'):
|
||||
@@ -1747,7 +1782,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
|
||||
'track': mobj.group('track').strip(),
|
||||
'release_date': release_date,
|
||||
'release_year': int(release_year),
|
||||
'release_year': int_or_none(release_year),
|
||||
})
|
||||
|
||||
initial_data = None
|
||||
@@ -2597,9 +2632,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
next_continuation = cls._extract_next_continuation_data(renderer)
|
||||
if next_continuation:
|
||||
return next_continuation
|
||||
contents = renderer.get('contents') or renderer.get('items')
|
||||
if not isinstance(contents, list):
|
||||
return
|
||||
contents = []
|
||||
for key in ('contents', 'items'):
|
||||
contents.extend(try_get(renderer, lambda x: x[key], list) or [])
|
||||
for content in contents:
|
||||
if not isinstance(content, dict):
|
||||
continue
|
||||
|
Reference in New Issue
Block a user