mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 00:57:07 -05:00 
			
		
		
		
	[youtube] Correct invalid JSON (Fixes #2353)
This commit is contained in:
		@@ -271,8 +271,11 @@ class InfoExtractor(object):
 | 
			
		||||
 | 
			
		||||
    def _download_json(self, url_or_request, video_id,
 | 
			
		||||
                       note=u'Downloading JSON metadata',
 | 
			
		||||
                       errnote=u'Unable to download JSON metadata'):
 | 
			
		||||
                       errnote=u'Unable to download JSON metadata',
 | 
			
		||||
                       transform_source=None):
 | 
			
		||||
        json_string = self._download_webpage(url_or_request, video_id, note, errnote)
 | 
			
		||||
        if transform_source:
 | 
			
		||||
            json_string = transform_source(json_string)
 | 
			
		||||
        try:
 | 
			
		||||
            return json.loads(json_string)
 | 
			
		||||
        except ValueError as ve:
 | 
			
		||||
 
 | 
			
		||||
@@ -34,6 +34,7 @@ from ..utils import (
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    write_json_file,
 | 
			
		||||
    uppercase_escape,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class YoutubeBaseInfoExtractor(InfoExtractor):
 | 
			
		||||
@@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):
 | 
			
		||||
            # Download all channel pages using the json-based channel_ajax query
 | 
			
		||||
            for pagenum in itertools.count(1):
 | 
			
		||||
                url = self._MORE_PAGES_URL % (pagenum, channel_id)
 | 
			
		||||
                page = self._download_webpage(url, channel_id,
 | 
			
		||||
                                              u'Downloading page #%s' % pagenum)
 | 
			
		||||
    
 | 
			
		||||
                page = json.loads(page)
 | 
			
		||||
    
 | 
			
		||||
                page = self._download_json(
 | 
			
		||||
                    url, channel_id, note=u'Downloading page #%s' % pagenum,
 | 
			
		||||
                    transform_source=uppercase_escape)
 | 
			
		||||
 | 
			
		||||
                ids_in_page = self.extract_videos_from_page(page['content_html'])
 | 
			
		||||
                video_ids.extend(ids_in_page)
 | 
			
		||||
    
 | 
			
		||||
 
 | 
			
		||||
@@ -1214,3 +1214,9 @@ class PagedList(object):
 | 
			
		||||
            if end == nextfirstid:
 | 
			
		||||
                break
 | 
			
		||||
        return res
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def uppercase_escape(s):
 | 
			
		||||
    return re.sub(
 | 
			
		||||
        r'\\U([0-9a-fA-F]{8})',
 | 
			
		||||
        lambda m: compat_chr(int(m.group(1), base=16)), s)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user