mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 05:37:07 -05:00 
			
		
		
		
	[youtube] Separate methods for embeds extraction
This commit is contained in:
		@@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
        if vid_me_embed_url is not None:
 | 
					        if vid_me_embed_url is not None:
 | 
				
			||||||
            return self.url_result(vid_me_embed_url, 'Vidme')
 | 
					            return self.url_result(vid_me_embed_url, 'Vidme')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Look for embedded YouTube player
 | 
					        # Look for YouTube embeds
 | 
				
			||||||
        matches = re.findall(r'''(?x)
 | 
					        youtube_urls = YoutubeIE._extract_urls(webpage)
 | 
				
			||||||
            (?:
 | 
					        if youtube_urls:
 | 
				
			||||||
                <iframe[^>]+?src=|
 | 
					 | 
				
			||||||
                data-video-url=|
 | 
					 | 
				
			||||||
                <embed[^>]+?src=|
 | 
					 | 
				
			||||||
                embedSWF\(?:\s*|
 | 
					 | 
				
			||||||
                <object[^>]+data=|
 | 
					 | 
				
			||||||
                new\s+SWFObject\(
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
            (["\'])
 | 
					 | 
				
			||||||
                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
 | 
					 | 
				
			||||||
                (?:embed|v|p)/.+?)
 | 
					 | 
				
			||||||
            \1''', webpage)
 | 
					 | 
				
			||||||
        if matches:
 | 
					 | 
				
			||||||
            return self.playlist_from_matches(
 | 
					            return self.playlist_from_matches(
 | 
				
			||||||
                matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
 | 
					                youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Look for lazyYT YouTube embed
 | 
					 | 
				
			||||||
        matches = re.findall(
 | 
					 | 
				
			||||||
            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
 | 
					 | 
				
			||||||
        if matches:
 | 
					 | 
				
			||||||
            return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Look for Wordpress "YouTube Video Importer" plugin
 | 
					 | 
				
			||||||
        matches = re.findall(r'''(?x)<div[^>]+
 | 
					 | 
				
			||||||
            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
 | 
					 | 
				
			||||||
            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
 | 
					 | 
				
			||||||
        if matches:
 | 
					 | 
				
			||||||
            return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        matches = DailymotionIE._extract_urls(webpage)
 | 
					        matches = DailymotionIE._extract_urls(webpage)
 | 
				
			||||||
        if matches:
 | 
					        if matches:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
				
			|||||||
            playback_url, video_id, 'Marking watched',
 | 
					            playback_url, video_id, 'Marking watched',
 | 
				
			||||||
            'Unable to mark watched', fatal=False)
 | 
					            'Unable to mark watched', fatal=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def _extract_urls(webpage):
 | 
				
			||||||
 | 
					        # Embedded YouTube player
 | 
				
			||||||
 | 
					        entries = [
 | 
				
			||||||
 | 
					            unescapeHTML(mobj.group('url'))
 | 
				
			||||||
 | 
					            for mobj in re.finditer(r'''(?x)
 | 
				
			||||||
 | 
					            (?:
 | 
				
			||||||
 | 
					                <iframe[^>]+?src=|
 | 
				
			||||||
 | 
					                data-video-url=|
 | 
				
			||||||
 | 
					                <embed[^>]+?src=|
 | 
				
			||||||
 | 
					                embedSWF\(?:\s*|
 | 
				
			||||||
 | 
					                <object[^>]+data=|
 | 
				
			||||||
 | 
					                new\s+SWFObject\(
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            (["\'])
 | 
				
			||||||
 | 
					                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
 | 
				
			||||||
 | 
					                (?:embed|v|p)/.+?)
 | 
				
			||||||
 | 
					            \1''', webpage)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # lazyYT YouTube embed
 | 
				
			||||||
 | 
					        entries.extend(list(map(
 | 
				
			||||||
 | 
					            unescapeHTML,
 | 
				
			||||||
 | 
					            re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Wordpress "YouTube Video Importer" plugin
 | 
				
			||||||
 | 
					        matches = re.findall(r'''(?x)<div[^>]+
 | 
				
			||||||
 | 
					            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
 | 
				
			||||||
 | 
					            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
 | 
				
			||||||
 | 
					        entries.extend(m[-1] for m in matches)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return entries
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def _extract_url(webpage):
 | 
				
			||||||
 | 
					        urls = YoutubeIE._extract_urls(webpage)
 | 
				
			||||||
 | 
					        return urls[0] if urls else None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def extract_id(cls, url):
 | 
					    def extract_id(cls, url):
 | 
				
			||||||
        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
 | 
					        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user