mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-03 23:07:07 -05:00 
			
		
		
		
	[WDR] extract jsonp-url by parsing data-extension of mediaLink
This commit is contained in:
		@@ -10,6 +10,7 @@ from ..compat import (
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    strip_jsonp,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
@@ -21,8 +22,6 @@ class WDRIE(InfoExtractor):
 | 
			
		||||
    _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
 | 
			
		||||
    _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
 | 
			
		||||
 | 
			
		||||
    _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)'
 | 
			
		||||
 | 
			
		||||
    _TESTS = [
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
 | 
			
		||||
@@ -102,9 +101,13 @@ class WDRIE(InfoExtractor):
 | 
			
		||||
        display_id = mobj.group('display_id')
 | 
			
		||||
        webpage = self._download_webpage(url, display_id)
 | 
			
		||||
 | 
			
		||||
        js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None)
 | 
			
		||||
        # for wdr.de the data-extension is in a tag with the class "mediaLink"
 | 
			
		||||
        # for wdrmaus its in a link to the page in a multiline "videoLink"-tag
 | 
			
		||||
        json_metadata = self._html_search_regex(
 | 
			
		||||
            r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
 | 
			
		||||
            webpage, 'media link', default=None, flags=re.MULTILINE)
 | 
			
		||||
 | 
			
		||||
        if not js_url:
 | 
			
		||||
        if not json_metadata:
 | 
			
		||||
            entries = [
 | 
			
		||||
                self.url_result(page_url + href[0], 'WDR')
 | 
			
		||||
                for href in re.findall(
 | 
			
		||||
@@ -117,8 +120,12 @@ class WDRIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
            raise ExtractorError('No downloadable streams found', expected=True)
 | 
			
		||||
 | 
			
		||||
        media_link_obj = self._parse_json(json_metadata, display_id,
 | 
			
		||||
                                          transform_source=js_to_json)
 | 
			
		||||
        jsonp_url = media_link_obj['mediaObj']['url']
 | 
			
		||||
 | 
			
		||||
        metadata = self._download_json(
 | 
			
		||||
            js_url, 'metadata', transform_source=strip_jsonp)
 | 
			
		||||
            jsonp_url, 'metadata', transform_source=strip_jsonp)
 | 
			
		||||
 | 
			
		||||
        metadata_tracker_data = metadata['trackerData']
 | 
			
		||||
        metadata_media_resource = metadata['mediaResource']
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user