mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 01:57:08 -05:00 
			
		
		
		
	[wsj:article] Add extractor
This commit is contained in:
		
				
					committed by
					
						
						Sergey M․
					
				
			
			
				
	
			
			
			
						parent
						
							0254f93b08
						
					
				
				
					commit
					3266d08af2
				
			@@ -1233,7 +1233,10 @@ from .wrzuta import (
 | 
			
		||||
    WrzutaIE,
 | 
			
		||||
    WrzutaPlaylistIE,
 | 
			
		||||
)
 | 
			
		||||
from .wsj import WSJIE
 | 
			
		||||
from .wsj import (
 | 
			
		||||
    WSJIE,
 | 
			
		||||
    WSJArticleIE,
 | 
			
		||||
)
 | 
			
		||||
from .xbef import XBefIE
 | 
			
		||||
from .xboxclips import XboxClipsIE
 | 
			
		||||
from .xfileshare import XFileShareIE
 | 
			
		||||
 
 | 
			
		||||
@@ -10,10 +10,11 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WSJIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'''(?x)https?://
 | 
			
		||||
    _VALID_URL = r'''(?x)
 | 
			
		||||
        (?:
 | 
			
		||||
            video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
 | 
			
		||||
            (?:www\.)?wsj\.com/video/[^/]+/
 | 
			
		||||
            https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
 | 
			
		||||
            https?://(?:www\.)?wsj\.com/video/[^/]+/|
 | 
			
		||||
            wsj:
 | 
			
		||||
        )
 | 
			
		||||
        (?P<id>[a-zA-Z0-9-]+)'''
 | 
			
		||||
    IE_DESC = 'Wall Street Journal'
 | 
			
		||||
@@ -87,3 +88,24 @@ class WSJIE(InfoExtractor):
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'categories': info.get('keywords'),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WSJArticleIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>\w[^/]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'upload_date': '20170221',
 | 
			
		||||
            'uploader_id': 'ralcaraz',
 | 
			
		||||
            'title': 'Bao Bao the Panda Leaves for China',
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        article_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, article_id)
 | 
			
		||||
        video_id = self._search_regex(r'data-src=["\']([A-Z0-9\-]+)',
 | 
			
		||||
                                      webpage, 'video id')
 | 
			
		||||
        return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user