mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 00:27:07 -05:00 
			
		
		
		
	[wsj] Improve and modernize (closes #12558)
This commit is contained in:
		@@ -11,12 +11,13 @@ from ..utils import (
 | 
			
		||||
 | 
			
		||||
class WSJIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'''(?x)
 | 
			
		||||
        (?:
 | 
			
		||||
            https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
 | 
			
		||||
            https?://(?:www\.)?wsj\.com/video/[^/]+/|
 | 
			
		||||
            wsj:
 | 
			
		||||
        )
 | 
			
		||||
        (?P<id>[a-zA-Z0-9-]+)'''
 | 
			
		||||
                        (?:
 | 
			
		||||
                            https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
 | 
			
		||||
                            https?://(?:www\.)?wsj\.com/video/[^/]+/|
 | 
			
		||||
                            wsj:
 | 
			
		||||
                        )
 | 
			
		||||
                        (?P<id>[a-fA-F0-9-]{36})
 | 
			
		||||
                    '''
 | 
			
		||||
    IE_DESC = 'Wall Street Journal'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
 | 
			
		||||
@@ -39,12 +40,17 @@ class WSJIE(InfoExtractor):
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
 | 
			
		||||
        api_url = (
 | 
			
		||||
            'http://video-api.wsj.com/api-video/find_all_videos.asp?'
 | 
			
		||||
            'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
 | 
			
		||||
            'thumbnailList,author,description,name,duration,videoURL,'
 | 
			
		||||
            'titletag,formattedCreationDate,keywords,editor' % video_id)
 | 
			
		||||
        info = self._download_json(api_url, video_id)['items'][0]
 | 
			
		||||
        info = self._download_json(
 | 
			
		||||
            'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
 | 
			
		||||
            query={
 | 
			
		||||
                'type': 'guid',
 | 
			
		||||
                'count': 1,
 | 
			
		||||
                'query': video_id,
 | 
			
		||||
                'fields': ','.join((
 | 
			
		||||
                    'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
 | 
			
		||||
                    'description', 'name', 'duration', 'videoURL', 'titletag',
 | 
			
		||||
                    'formattedCreationDate', 'keywords', 'editor')),
 | 
			
		||||
            })['items'][0]
 | 
			
		||||
        title = info.get('name', info.get('titletag'))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
@@ -91,8 +97,8 @@ class WSJIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class WSJArticleIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>\w[^/]+)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
    _VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
 | 
			
		||||
@@ -101,11 +107,11 @@ class WSJArticleIE(InfoExtractor):
 | 
			
		||||
            'uploader_id': 'ralcaraz',
 | 
			
		||||
            'title': 'Bao Bao the Panda Leaves for China',
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        article_id = self._match_id(url)
 | 
			
		||||
        webpage = self._download_webpage(url, article_id)
 | 
			
		||||
        video_id = self._search_regex(r'data-src=["\']([A-Z0-9\-]+)',
 | 
			
		||||
                                      webpage, 'video id')
 | 
			
		||||
        video_id = self._search_regex(
 | 
			
		||||
            r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
 | 
			
		||||
        return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user