mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 07:17:07 -05:00 
			
		
		
		
	[youtube] Add support for downloading top lists (fixes #1868)
It needs to know the channel and the title of the list, because the ids change every time you browse the channels and are attached to a 'VISITOR_INFO1_LIVE' cookie.
This commit is contained in:
		@@ -15,6 +15,7 @@ from youtube_dl.extractor import (
 | 
			
		||||
    YoutubeIE,
 | 
			
		||||
    YoutubeChannelIE,
 | 
			
		||||
    YoutubeShowIE,
 | 
			
		||||
    YoutubeTopListIE,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -116,5 +117,12 @@ class TestYoutubeLists(unittest.TestCase):
 | 
			
		||||
        original_video = entries[0]
 | 
			
		||||
        self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
 | 
			
		||||
 | 
			
		||||
    def test_youtube_toplist(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = YoutubeTopListIE(dl)
 | 
			
		||||
        result = ie.extract('yttoplist:music:Top Tracks')
 | 
			
		||||
        entries = result['entries']
 | 
			
		||||
        self.assertTrue(len(entries) >= 9)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -194,6 +194,7 @@ from .youtube import (
 | 
			
		||||
    YoutubeWatchLaterIE,
 | 
			
		||||
    YoutubeFavouritesIE,
 | 
			
		||||
    YoutubeHistoryIE,
 | 
			
		||||
    YoutubeTopListIE,
 | 
			
		||||
)
 | 
			
		||||
from .zdf import ZDFIE
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1576,6 +1576,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        if len(playlist_id) == 13:  # 'RD' + 11 characters for the video id
 | 
			
		||||
            # Mixes require a custom extraction process
 | 
			
		||||
            return self._extract_mix(playlist_id)
 | 
			
		||||
        if playlist_id.startswith('TL'):
 | 
			
		||||
            raise ExtractorError(u'For downloading YouTube.com top lists, use '
 | 
			
		||||
                u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
 | 
			
		||||
 | 
			
		||||
        # Extract the video ids from the playlist pages
 | 
			
		||||
        ids = []
 | 
			
		||||
@@ -1598,6 +1601,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
        return self.playlist_result(url_results, playlist_id, playlist_title)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeTopListIE(YoutubePlaylistIE):
 | 
			
		||||
    IE_NAME = u'youtube:toplist'
 | 
			
		||||
    IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
 | 
			
		||||
        u' (Example: "yttoplist:music:Top Tracks")')
 | 
			
		||||
    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        channel = mobj.group('chann')
 | 
			
		||||
        title = mobj.group('title')
 | 
			
		||||
        query = compat_urllib_parse.urlencode({'title': title})
 | 
			
		||||
        playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query)
 | 
			
		||||
        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
 | 
			
		||||
        link = self._html_search_regex(playlist_re, channel_page, u'list')
 | 
			
		||||
        url = compat_urlparse.urljoin('https://www.youtube.com/', link)
 | 
			
		||||
        
 | 
			
		||||
        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
 | 
			
		||||
        ids = []
 | 
			
		||||
        # sometimes the webpage doesn't contain the videos
 | 
			
		||||
        # retry until we get them
 | 
			
		||||
        for i in itertools.count(0):
 | 
			
		||||
            msg = u'Downloading Youtube mix'
 | 
			
		||||
            if i > 0:
 | 
			
		||||
                msg += ', retry #%d' % i
 | 
			
		||||
            webpage = self._download_webpage(url, title, msg)
 | 
			
		||||
            ids = orderedSet(re.findall(video_re, webpage))
 | 
			
		||||
            if ids:
 | 
			
		||||
                break
 | 
			
		||||
        url_results = self._ids_to_results(ids)
 | 
			
		||||
        return self.playlist_result(url_results, playlist_title=title)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class YoutubeChannelIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = u'YouTube.com channels'
 | 
			
		||||
    _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user