mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 08:17:08 -05:00 
			
		
		
		
	[itv] Make SOAP request non fatal and extract metadata from a webpage (closes #16780)
This commit is contained in:
		@@ -18,6 +18,7 @@ from ..utils import (
 | 
			
		||||
    xpath_element,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    merge_dicts,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
@@ -129,64 +130,65 @@ class ITVIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        resp_env = self._download_xml(
 | 
			
		||||
            params['data-playlist-url'], video_id,
 | 
			
		||||
            headers=headers, data=etree.tostring(req_env))
 | 
			
		||||
        playlist = xpath_element(resp_env, './/Playlist')
 | 
			
		||||
        if playlist is None:
 | 
			
		||||
            fault_code = xpath_text(resp_env, './/faultcode')
 | 
			
		||||
            fault_string = xpath_text(resp_env, './/faultstring')
 | 
			
		||||
            if fault_code == 'InvalidGeoRegion':
 | 
			
		||||
                self.raise_geo_restricted(
 | 
			
		||||
                    msg=fault_string, countries=self._GEO_COUNTRIES)
 | 
			
		||||
            elif fault_code not in (
 | 
			
		||||
                    'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
 | 
			
		||||
                raise ExtractorError(
 | 
			
		||||
                    '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
 | 
			
		||||
            info.update({
 | 
			
		||||
                'title': self._og_search_title(webpage),
 | 
			
		||||
                'episode_title': params.get('data-video-episode'),
 | 
			
		||||
                'series': params.get('data-video-title'),
 | 
			
		||||
            })
 | 
			
		||||
        else:
 | 
			
		||||
            title = xpath_text(playlist, 'EpisodeTitle', default=None)
 | 
			
		||||
            info.update({
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'episode_title': title,
 | 
			
		||||
                'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
 | 
			
		||||
                'series': xpath_text(playlist, 'ProgrammeTitle'),
 | 
			
		||||
                'duration': parse_duration(xpath_text(playlist, 'Duration')),
 | 
			
		||||
            })
 | 
			
		||||
            video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
 | 
			
		||||
            media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
 | 
			
		||||
            rtmp_url = media_files.attrib['base']
 | 
			
		||||
            headers=headers, data=etree.tostring(req_env), fatal=False)
 | 
			
		||||
        if resp_env:
 | 
			
		||||
            playlist = xpath_element(resp_env, './/Playlist')
 | 
			
		||||
            if playlist is None:
 | 
			
		||||
                fault_code = xpath_text(resp_env, './/faultcode')
 | 
			
		||||
                fault_string = xpath_text(resp_env, './/faultstring')
 | 
			
		||||
                if fault_code == 'InvalidGeoRegion':
 | 
			
		||||
                    self.raise_geo_restricted(
 | 
			
		||||
                        msg=fault_string, countries=self._GEO_COUNTRIES)
 | 
			
		||||
                elif fault_code not in (
 | 
			
		||||
                        'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
 | 
			
		||||
                    raise ExtractorError(
 | 
			
		||||
                        '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
 | 
			
		||||
                info.update({
 | 
			
		||||
                    'title': self._og_search_title(webpage),
 | 
			
		||||
                    'episode_title': params.get('data-video-episode'),
 | 
			
		||||
                    'series': params.get('data-video-title'),
 | 
			
		||||
                })
 | 
			
		||||
            else:
 | 
			
		||||
                title = xpath_text(playlist, 'EpisodeTitle', default=None)
 | 
			
		||||
                info.update({
 | 
			
		||||
                    'title': title,
 | 
			
		||||
                    'episode_title': title,
 | 
			
		||||
                    'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
 | 
			
		||||
                    'series': xpath_text(playlist, 'ProgrammeTitle'),
 | 
			
		||||
                    'duration': parse_duration(xpath_text(playlist, 'Duration')),
 | 
			
		||||
                })
 | 
			
		||||
                video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
 | 
			
		||||
                media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
 | 
			
		||||
                rtmp_url = media_files.attrib['base']
 | 
			
		||||
 | 
			
		||||
            for media_file in media_files.findall('MediaFile'):
 | 
			
		||||
                play_path = xpath_text(media_file, 'URL')
 | 
			
		||||
                if not play_path:
 | 
			
		||||
                    continue
 | 
			
		||||
                tbr = int_or_none(media_file.get('bitrate'), 1000)
 | 
			
		||||
                f = {
 | 
			
		||||
                    'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
 | 
			
		||||
                    'play_path': play_path,
 | 
			
		||||
                    # Providing this swfVfy allows to avoid truncated downloads
 | 
			
		||||
                    'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
 | 
			
		||||
                    'page_url': url,
 | 
			
		||||
                    'tbr': tbr,
 | 
			
		||||
                    'ext': 'flv',
 | 
			
		||||
                }
 | 
			
		||||
                app = self._search_regex(
 | 
			
		||||
                    'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
 | 
			
		||||
                if app:
 | 
			
		||||
                    f.update({
 | 
			
		||||
                        'url': rtmp_url.split('?', 1)[0],
 | 
			
		||||
                        'app': app,
 | 
			
		||||
                    })
 | 
			
		||||
                else:
 | 
			
		||||
                    f['url'] = rtmp_url
 | 
			
		||||
                formats.append(f)
 | 
			
		||||
                for media_file in media_files.findall('MediaFile'):
 | 
			
		||||
                    play_path = xpath_text(media_file, 'URL')
 | 
			
		||||
                    if not play_path:
 | 
			
		||||
                        continue
 | 
			
		||||
                    tbr = int_or_none(media_file.get('bitrate'), 1000)
 | 
			
		||||
                    f = {
 | 
			
		||||
                        'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
 | 
			
		||||
                        'play_path': play_path,
 | 
			
		||||
                        # Providing this swfVfy allows to avoid truncated downloads
 | 
			
		||||
                        'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
 | 
			
		||||
                        'page_url': url,
 | 
			
		||||
                        'tbr': tbr,
 | 
			
		||||
                        'ext': 'flv',
 | 
			
		||||
                    }
 | 
			
		||||
                    app = self._search_regex(
 | 
			
		||||
                        'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
 | 
			
		||||
                    if app:
 | 
			
		||||
                        f.update({
 | 
			
		||||
                            'url': rtmp_url.split('?', 1)[0],
 | 
			
		||||
                            'app': app,
 | 
			
		||||
                        })
 | 
			
		||||
                    else:
 | 
			
		||||
                        f['url'] = rtmp_url
 | 
			
		||||
                    formats.append(f)
 | 
			
		||||
 | 
			
		||||
            for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
 | 
			
		||||
                if caption_url.text:
 | 
			
		||||
                    extract_subtitle(caption_url.text)
 | 
			
		||||
                for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
 | 
			
		||||
                    if caption_url.text:
 | 
			
		||||
                        extract_subtitle(caption_url.text)
 | 
			
		||||
 | 
			
		||||
        ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
 | 
			
		||||
        hmac = params.get('data-video-hmac')
 | 
			
		||||
@@ -261,7 +263,17 @@ class ITVIE(InfoExtractor):
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'subtitles': subtitles,
 | 
			
		||||
        })
 | 
			
		||||
        return info
 | 
			
		||||
 | 
			
		||||
        webpage_info = self._search_json_ld(webpage, video_id, default={})
 | 
			
		||||
        if not webpage_info.get('title'):
 | 
			
		||||
            webpage_info['title'] = self._html_search_regex(
 | 
			
		||||
                r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
 | 
			
		||||
                webpage, 'title', default=None) or self._og_search_title(
 | 
			
		||||
                webpage, default=None) or self._html_search_meta(
 | 
			
		||||
                'twitter:title', webpage, 'title',
 | 
			
		||||
                default=None) or webpage_info['episode']
 | 
			
		||||
 | 
			
		||||
        return merge_dicts(info, webpage_info)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ITVBTCCIE(InfoExtractor):
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user