mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-03 22:37:07 -05:00 
			
		
		
		
	@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import compat_str
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
@@ -375,6 +376,35 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
            },
 | 
			
		||||
            'expected_warnings': ['HTTP Error 403: Forbidden'],
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '3007193718',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
 | 
			
		||||
                'description': 'md5:37efbac85e0c09b009586523ec143652',
 | 
			
		||||
                'duration': 6292,
 | 
			
		||||
                'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
            'expected_warnings': ['HTTP Error 403: Forbidden'],
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
 | 
			
		||||
            'info_dict': {
 | 
			
		||||
                'id': '3011407934',
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'title': 'Stories from the Stage - Road Trip',
 | 
			
		||||
                'duration': 1619,
 | 
			
		||||
                'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
 | 
			
		||||
            },
 | 
			
		||||
            'params': {
 | 
			
		||||
                'skip_download': True,
 | 
			
		||||
            },
 | 
			
		||||
            'expected_warnings': ['HTTP Error 403: Forbidden'],
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
 | 
			
		||||
            'only_matching': True,
 | 
			
		||||
@@ -438,6 +468,7 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
                r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>',  # jwplayer
 | 
			
		||||
                r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
 | 
			
		||||
                r'<div[^>]+\bdata-cove-id=["\'](\d+)"',  # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
 | 
			
		||||
                r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)',  # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
 | 
			
		||||
            ]
 | 
			
		||||
 | 
			
		||||
            media_id = self._search_regex(
 | 
			
		||||
@@ -472,7 +503,8 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
            if not url:
 | 
			
		||||
                url = self._og_search_url(webpage)
 | 
			
		||||
 | 
			
		||||
            mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
            mobj = re.match(
 | 
			
		||||
                self._VALID_URL, self._proto_relative_url(url.strip()))
 | 
			
		||||
 | 
			
		||||
        player_id = mobj.group('player_id')
 | 
			
		||||
        if not display_id:
 | 
			
		||||
@@ -482,13 +514,27 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
                url, display_id, note='Downloading player page',
 | 
			
		||||
                errnote='Could not download player page')
 | 
			
		||||
            video_id = self._search_regex(
 | 
			
		||||
                r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
 | 
			
		||||
                r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
 | 
			
		||||
                default=None)
 | 
			
		||||
            if not video_id:
 | 
			
		||||
                video_info = self._extract_video_data(
 | 
			
		||||
                    player_page, 'video data', display_id)
 | 
			
		||||
                video_id = compat_str(
 | 
			
		||||
                    video_info.get('id') or video_info['contentID'])
 | 
			
		||||
        else:
 | 
			
		||||
            video_id = mobj.group('id')
 | 
			
		||||
            display_id = video_id
 | 
			
		||||
 | 
			
		||||
        return video_id, display_id, None, description
 | 
			
		||||
 | 
			
		||||
    def _extract_video_data(self, string, name, video_id, fatal=True):
 | 
			
		||||
        return self._parse_json(
 | 
			
		||||
            self._search_regex(
 | 
			
		||||
                [r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
 | 
			
		||||
                 r'window\.videoBridge\s*=\s*({.+?});'],
 | 
			
		||||
                string, name, default='{}'),
 | 
			
		||||
            video_id, transform_source=js_to_json, fatal=fatal)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id, display_id, upload_date, description = self._extract_webpage(url)
 | 
			
		||||
 | 
			
		||||
@@ -519,11 +565,8 @@ class PBSIE(InfoExtractor):
 | 
			
		||||
                'http://player.pbs.org/%s/%s' % (page, video_id),
 | 
			
		||||
                display_id, 'Downloading %s page' % page, fatal=False)
 | 
			
		||||
            if player:
 | 
			
		||||
                video_info = self._parse_json(
 | 
			
		||||
                    self._search_regex(
 | 
			
		||||
                        [r'(?s)PBS\.videoData\s*=\s*({.+?});\n', r'window\.videoBridge\s*=\s*({.+?});'],
 | 
			
		||||
                        player, '%s video data' % page, default='{}'),
 | 
			
		||||
                    display_id, transform_source=js_to_json, fatal=False)
 | 
			
		||||
                video_info = self._extract_video_data(
 | 
			
		||||
                    player, '%s video data' % page, display_id, fatal=False)
 | 
			
		||||
                if video_info:
 | 
			
		||||
                    extract_redirect_urls(video_info)
 | 
			
		||||
                    if not info:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user