mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 04:57:07 -05:00 
			
		
		
		
	[generic] Add support for multiple brightcove URLs (Fixes #2283)
This commit is contained in:
		@@ -34,6 +34,7 @@ from youtube_dl.extractor import (
 | 
				
			|||||||
    KhanAcademyIE,
 | 
					    KhanAcademyIE,
 | 
				
			||||||
    EveryonesMixtapeIE,
 | 
					    EveryonesMixtapeIE,
 | 
				
			||||||
    RutubeChannelIE,
 | 
					    RutubeChannelIE,
 | 
				
			||||||
 | 
					    GenericIE,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase):
 | 
				
			|||||||
        self.assertEqual(result['id'], '1409')
 | 
					        self.assertEqual(result['id'], '1409')
 | 
				
			||||||
        self.assertTrue(len(result['entries']) >= 34)
 | 
					        self.assertTrue(len(result['entries']) >= 34)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_multiple_brightcove_videos(self):
 | 
				
			||||||
 | 
					        # https://github.com/rg3/youtube-dl/issues/2283
 | 
				
			||||||
 | 
					        dl = FakeYDL()
 | 
				
			||||||
 | 
					        ie = GenericIE(dl)
 | 
				
			||||||
 | 
					        result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
 | 
				
			||||||
 | 
					        self.assertIsPlaylist(result)
 | 
				
			||||||
 | 
					        self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
 | 
				
			||||||
 | 
					        self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
 | 
				
			||||||
 | 
					        self.assertEqual(len(result['entries']), 3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    unittest.main()
 | 
					    unittest.main()
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def _extract_brightcove_url(cls, webpage):
 | 
					    def _extract_brightcove_url(cls, webpage):
 | 
				
			||||||
        """Try to extract the brightcove url from the wepbage, returns None
 | 
					        """Try to extract the brightcove url from the webpage, returns None
 | 
				
			||||||
        if it can't be found
 | 
					        if it can't be found
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
 | 
					        urls = cls._extract_brightcove_urls(webpage)
 | 
				
			||||||
 | 
					        return urls[0] if urls else None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def _extract_brightcove_urls(cls, webpage):
 | 
				
			||||||
 | 
					        """Return a list of all Brightcove URLs from the webpage """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
 | 
					        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
 | 
				
			||||||
        if url_m:
 | 
					        if url_m:
 | 
				
			||||||
            return url_m.group(1)
 | 
					            return [url_m.group(1)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        m_brightcove = re.search(
 | 
					        matches = re.findall(
 | 
				
			||||||
            r'''(?sx)<object
 | 
					            r'''(?sx)<object
 | 
				
			||||||
            (?:
 | 
					            (?:
 | 
				
			||||||
                [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
 | 
					                [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
 | 
				
			||||||
                [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
 | 
					                [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
 | 
				
			||||||
            ).+?</object>''',
 | 
					            ).+?</object>''',
 | 
				
			||||||
            webpage)
 | 
					            webpage)
 | 
				
			||||||
        if m_brightcove is not None:
 | 
					        return [cls._build_brighcove_url(m) for m in matches]
 | 
				
			||||||
            return cls._build_brighcove_url(m_brightcove.group())
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        url, smuggled_data = unsmuggle_url(url, {})
 | 
					        url, smuggled_data = unsmuggle_url(url, {})
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 | 
					            r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Look for BrightCove:
 | 
					        # Look for BrightCove:
 | 
				
			||||||
        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
 | 
					        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
 | 
				
			||||||
        if bc_url is not None:
 | 
					        if bc_urls:
 | 
				
			||||||
            self.to_screen('Brightcove video detected.')
 | 
					            self.to_screen('Brightcove video detected.')
 | 
				
			||||||
            surl = smuggle_url(bc_url, {'Referer': url})
 | 
					            entries = [{
 | 
				
			||||||
            return self.url_result(surl, 'Brightcove')
 | 
					                '_type': 'url',
 | 
				
			||||||
 | 
					                'url': smuggle_url(bc_url, {'Referer': url}),
 | 
				
			||||||
 | 
					                'ie_key': 'Brightcove'
 | 
				
			||||||
 | 
					            } for bc_url in bc_urls]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            return {
 | 
				
			||||||
 | 
					                '_type': 'playlist',
 | 
				
			||||||
 | 
					                'title': video_title,
 | 
				
			||||||
 | 
					                'id': video_id,
 | 
				
			||||||
 | 
					                'entries': entries,
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Look for embedded (iframe) Vimeo player
 | 
					        # Look for embedded (iframe) Vimeo player
 | 
				
			||||||
        mobj = re.search(
 | 
					        mobj = re.search(
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user