mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 09:17:07 -05:00 
			
		
		
		
	[youtube] Adapt to new automatic captions rendition (closes #13467)
This commit is contained in:
		@@ -1269,37 +1269,57 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
                    sub_lang_list[sub_lang] = sub_formats
 | 
			
		||||
                return sub_lang_list
 | 
			
		||||
 | 
			
		||||
            # Some videos don't provide ttsurl but rather caption_tracks and
 | 
			
		||||
            # caption_translation_languages (e.g. 20LmZk1hakA)
 | 
			
		||||
            caption_tracks = args['caption_tracks']
 | 
			
		||||
            caption_translation_languages = args['caption_translation_languages']
 | 
			
		||||
            caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
 | 
			
		||||
            parsed_caption_url = compat_urllib_parse_urlparse(caption_url)
 | 
			
		||||
            caption_qs = compat_parse_qs(parsed_caption_url.query)
 | 
			
		||||
 | 
			
		||||
            sub_lang_list = {}
 | 
			
		||||
            for lang in caption_translation_languages.split(','):
 | 
			
		||||
                lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
 | 
			
		||||
                sub_lang = lang_qs.get('lc', [None])[0]
 | 
			
		||||
                if not sub_lang:
 | 
			
		||||
                    continue
 | 
			
		||||
            def make_captions(sub_url, sub_langs):
 | 
			
		||||
                parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
 | 
			
		||||
                caption_qs = compat_parse_qs(parsed_sub_url.query)
 | 
			
		||||
                captions = {}
 | 
			
		||||
                for sub_lang in sub_langs:
 | 
			
		||||
                    sub_formats = []
 | 
			
		||||
                    for ext in self._SUBTITLE_FORMATS:
 | 
			
		||||
                        caption_qs.update({
 | 
			
		||||
                            'tlang': [sub_lang],
 | 
			
		||||
                            'fmt': [ext],
 | 
			
		||||
                        })
 | 
			
		||||
                    sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
 | 
			
		||||
                        sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
 | 
			
		||||
                            query=compat_urllib_parse_urlencode(caption_qs, True)))
 | 
			
		||||
                        sub_formats.append({
 | 
			
		||||
                            'url': sub_url,
 | 
			
		||||
                            'ext': ext,
 | 
			
		||||
                        })
 | 
			
		||||
                sub_lang_list[sub_lang] = sub_formats
 | 
			
		||||
            return sub_lang_list
 | 
			
		||||
                    captions[sub_lang] = sub_formats
 | 
			
		||||
                return captions
 | 
			
		||||
 | 
			
		||||
            # New captions format as of 22.06.2017
 | 
			
		||||
            player_response = args.get('player_response')
 | 
			
		||||
            if player_response and isinstance(player_response, compat_str):
 | 
			
		||||
                player_response = self._parse_json(
 | 
			
		||||
                    player_response, video_id, fatal=False)
 | 
			
		||||
                if player_response:
 | 
			
		||||
                    renderer = player_response['captions']['playerCaptionsTracklistRenderer']
 | 
			
		||||
                    base_url = renderer['captionTracks'][0]['baseUrl']
 | 
			
		||||
                    sub_lang_list = []
 | 
			
		||||
                    for lang in renderer['translationLanguages']:
 | 
			
		||||
                        lang_code = lang.get('languageCode')
 | 
			
		||||
                        if lang_code:
 | 
			
		||||
                            sub_lang_list.append(lang_code)
 | 
			
		||||
                    return make_captions(base_url, sub_lang_list)
 | 
			
		||||
 | 
			
		||||
            # Some videos don't provide ttsurl but rather caption_tracks and
 | 
			
		||||
            # caption_translation_languages (e.g. 20LmZk1hakA)
 | 
			
		||||
            # Does not used anymore as of 22.06.2017
 | 
			
		||||
            caption_tracks = args['caption_tracks']
 | 
			
		||||
            caption_translation_languages = args['caption_translation_languages']
 | 
			
		||||
            caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
 | 
			
		||||
            sub_lang_list = []
 | 
			
		||||
            for lang in caption_translation_languages.split(','):
 | 
			
		||||
                lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
 | 
			
		||||
                sub_lang = lang_qs.get('lc', [None])[0]
 | 
			
		||||
                if sub_lang:
 | 
			
		||||
                    sub_lang_list.append(sub_lang)
 | 
			
		||||
            return make_captions(caption_url, sub_lang_list)
 | 
			
		||||
        # An extractor error can be raise by the download process if there are
 | 
			
		||||
        # no automatic captions but there are subtitles
 | 
			
		||||
        except (KeyError, ExtractorError):
 | 
			
		||||
        except (KeyError, IndexError, ExtractorError):
 | 
			
		||||
            self._downloader.report_warning(err_msg)
 | 
			
		||||
            return {}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user