1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-09-20 01:48:42 -04:00

Updated to release 2020.11.21.1

This commit is contained in:
pukkandan 2020-11-21 20:20:42 +05:30
parent 3462ffa892
commit a0566bbf5c
29 changed files with 559 additions and 360 deletions

View File

@ -37,7 +37,7 @@
"writeinfojson": true, "writeinfojson": true,
"writesubtitles": false, "writesubtitles": false,
"allsubtitles": false, "allsubtitles": false,
"listssubtitles": false, "listsubtitles": false,
"socket_timeout": 20, "socket_timeout": 20,
"fixup": "never" "fixup": "never"
} }

View File

@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(downloaded['extractor'], 'testex') self.assertEqual(downloaded['extractor'], 'testex')
self.assertEqual(downloaded['extractor_key'], 'TestEx') self.assertEqual(downloaded['extractor_key'], 'TestEx')
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
class _YDL(YDL):
def __init__(self, *args, **kwargs):
super(_YDL, self).__init__(*args, **kwargs)
def trouble(self, s, tb=None):
pass
ydl = _YDL({
'format': 'extra',
'ignoreerrors': True,
})
class VideoIE(InfoExtractor):
_VALID_URL = r'video:(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
formats = [{
'format_id': 'default',
'url': 'url:',
}]
if video_id == '0':
raise ExtractorError('foo')
if video_id == '2':
formats.append({
'format_id': 'extra',
'url': TEST_URL,
})
return {
'id': video_id,
'title': 'Video %s' % video_id,
'formats': formats,
}
class PlaylistIE(InfoExtractor):
_VALID_URL = r'playlist:'
def _entries(self):
for n in range(3):
video_id = compat_str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),
'id': video_id,
'url': 'video:%s' % video_id,
'title': 'Video Transparent %s' % video_id,
}
def _real_extract(self, url):
return self.playlist_result(self._entries())
ydl.add_info_extractor(VideoIE(ydl))
ydl.add_info_extractor(PlaylistIE(ydl))
info = ydl.extract_info('playlist:')
entries = info['entries']
self.assertEqual(len(entries), 3)
self.assertTrue(entries[0] is None)
self.assertTrue(entries[1] is None)
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(entries[2], downloaded)
self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'Video Transparent 2')
self.assertEqual(downloaded['id'], '2')
self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -830,34 +830,23 @@ class YoutubeDL(object):
'and will probably not work.') 'and will probably not work.')
try: try:
try: temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url) except (AssertionError, IndexError, AttributeError):
except (AssertionError, IndexError, AttributeError): temp_id = None
temp_id = None if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): self.to_screen("[%s] %s: has already been recorded in archive" % (
self.to_screen("[%s] %s: has already been recorded in archive" % ( ie_key, temp_id))
ie_key, temp_id)) break
break
ie_result = ie.extract(url) return self.__extract_info(url, ie, download, extra_info, process, info_dict)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break else:
if isinstance(ie_result, list): self.report_error('no suitable InfoExtractor for URL %s' % url)
# Backwards compatibility: old IE result format
ie_result = { def __handle_extraction_exceptions(func):
'_type': 'compat_list', def wrapper(self, *args, **kwargs):
'entries': ie_result, try:
} return func(self, *args, **kwargs)
if info_dict:
if info_dict.get('id'):
ie_result['id'] = info_dict['id']
if info_dict.get('title'):
ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
except GeoRestrictedError as e: except GeoRestrictedError as e:
msg = e.msg msg = e.msg
if e.countries: if e.countries:
@ -865,20 +854,38 @@ class YoutubeDL(object):
map(ISO3166Utils.short2full, e.countries)) map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg) self.report_error(msg)
break
except ExtractorError as e: # An error we somewhat expected except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback()) self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached: except MaxDownloadsReached:
raise raise
except Exception as e: except Exception as e:
if self.params.get('ignoreerrors', False): if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else: else:
raise raise
return wrapper
@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process, info_dict):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
return
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
if info_dict:
if info_dict.get('id'):
ie_result['id'] = info_dict['id']
if info_dict.get('title'):
ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else: else:
self.report_error('no suitable InfoExtractor for URL %s' % url) return ie_result
def add_default_extra_info(self, ie_result, ie, url): def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, { self.add_extra_info(ie_result, {
@ -1057,9 +1064,8 @@ class YoutubeDL(object):
self.to_screen('[download] ' + reason) self.to_screen('[download] ' + reason)
continue continue
entry_result = self.process_ie_result(entry, entry_result = self.__process_iterable_entry(entry, download, extra)
download=download, # TODO: skip failed (empty) entries?
extra_info=extra)
playlist_results.append(entry_result) playlist_results.append(entry_result)
ie_result['entries'] = playlist_results ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist) self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@ -1088,6 +1094,11 @@ class YoutubeDL(object):
else: else:
raise Exception('Invalid result type: %s' % result_type) raise Exception('Invalid result type: %s' % result_type)
@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)
def _build_format_filter(self, filter_spec): def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec " " Returns a function to filter the formats according to the filter_spec "

View File

@ -2345,7 +2345,7 @@ except ImportError: # Python <3.4
# HTMLParseError has been deprecated in Python 3.3 and removed in # HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exceptiong handling # and uniform cross-version exception handling
class compat_HTMLParseError(Exception): class compat_HTMLParseError(Exception):
pass pass

View File

@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
try: try:
ctx.data = self.ydl.urlopen(request) ctx.data = self.ydl.urlopen(request)
except (compat_urllib_error.URLError, ) as err: except (compat_urllib_error.URLError, ) as err:
if isinstance(err.reason, socket.timeout): # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
reason = getattr(err, 'reason', None)
if isinstance(reason, socket.timeout):
raise RetryDownload(err) raise RetryDownload(err)
raise err raise err
# When trying to resume, Content-Range HTTP header of response has to be checked # When trying to resume, Content-Range HTTP header of response has to be checked

View File

@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
from ..utils import (
int_or_none,
parse_iso8601,
update_url_query,
)
class AmaraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
_TESTS = [{
# Youtube
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
'info_dict': {
'id': 'h6ZuVdvYnfE',
'ext': 'mp4',
'title': 'Why jury trials are becoming less common',
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20160813',
'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570,
}
}, {
# Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
'md5': '99392c75fa05d432a8f11df03612195e',
'info_dict': {
'id': '18622084',
'ext': 'mov',
'title': 'Vimeo at CES 2011!',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'timestamp': 1294763658,
'upload_date': '20110111',
'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill'
}
}, {
# Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
'md5': 'd3970f08512738ee60c5807311ff5d3f',
'info_dict': {
'id': 's8KL7I3jLmh6',
'ext': 'mp4',
'title': 'The danger of a single story',
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20091007',
'timestamp': 1254942511,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id,
video_id, query={'format': 'json'})
title = meta['title']
video_url = meta['all_urls'][0]
subtitles = {}
for language in (meta.get('languages') or []):
subtitles_uri = language.get('subtitles_uri')
if not (subtitles_uri and language.get('published')):
continue
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
for f in ('json', 'srt', 'vtt'):
subtitle.append({
'ext': f,
'url': update_url_query(subtitles_uri, {'format': f}),
})
info = {
'url': video_url,
'id': video_id,
'subtitles': subtitles,
'title': title,
'description': meta.get('description'),
'thumbnail': meta.get('thumbnail'),
'duration': int_or_none(meta.get('duration')),
'timestamp': parse_iso8601(meta.get('created')),
}
for ie in (YoutubeIE, VimeoIE):
if ie.suitable(video_url):
info.update({
'_type': 'url_transparent',
'ie_key': ie.ie_key(),
})
break
return info

View File

@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
] ]
@classmethod @classmethod
def _build_brighcove_url(cls, object_str): def _build_brightcove_url(cls, object_str):
""" """
Build a Brightcove url from a xml string containing Build a Brightcove url from a xml string containing
<object class="BrightcoveExperience">{params}</object> <object class="BrightcoveExperience">{params}</object>
@ -217,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
return cls._make_brightcove_url(params) return cls._make_brightcove_url(params)
@classmethod @classmethod
def _build_brighcove_url_from_js(cls, object_js): def _build_brightcove_url_from_js(cls, object_js):
# The layout of JS is as follows: # The layout of JS is as follows:
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
# // build Brightcove <object /> XML # // build Brightcove <object /> XML
@ -272,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
).+?>\s*</object>''', ).+?>\s*</object>''',
webpage) webpage)
if matches: if matches:
return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
if matches: if matches:
return list(filter(None, [ return list(filter(None, [
cls._build_brighcove_url_from_js(custom_bc) cls._build_brightcove_url_from_js(custom_bc)
for custom_bc in matches])) for custom_bc in matches]))
return [src for _, src in re.findall( return [src for _, src in re.findall(
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]

View File

@ -1664,7 +1664,7 @@ class InfoExtractor(object):
# just the media without qualities renditions. # just the media without qualities renditions.
# Fortunately, master playlist can be easily distinguished from media # Fortunately, master playlist can be easily distinguished from media
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4] # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
# master playlist tags MUST NOT appear in a media playist and vice versa. # master playlist tags MUST NOT appear in a media playlist and vice versa.
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
# media playlist and MUST NOT appear in master playlist thus we can # media playlist and MUST NOT appear in master playlist thus we can
# clearly detect media playlist with this criterion. # clearly detect media playlist with this criterion.

View File

@ -7,7 +7,7 @@ from .dplay import DPlayIE
class DiscoveryNetworksDeIE(DPlayIE): class DiscoveryNetworksDeIE(DPlayIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
}, { }, {
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):
title = get_item('title', preferred_langs) or video_id title = get_item('title', preferred_langs) or video_id
description = get_item('description', preferred_langs) description = get_item('description', preferred_langs)
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnmail, 'thumbnail': thumbnail,
'upload_date': upload_date, 'upload_date': upload_date,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,

View File

@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
from .alura import ( from .alura import (
AluraIE, AluraIE,
AluraCourseIE AluraCourseIE
@ -1507,7 +1508,6 @@ from .youtube import (
YoutubeIE, YoutubeIE,
YoutubeFavouritesIE, YoutubeFavouritesIE,
YoutubeHistoryIE, YoutubeHistoryIE,
YoutubeLiveIE,
YoutubeTabIE, YoutubeTabIE,
YoutubePlaylistIE, YoutubePlaylistIE,
YoutubeRecommendedIE, YoutubeRecommendedIE,

View File

@ -211,7 +211,7 @@ class FranceTVIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': self._live_title(title) if is_live else title, 'title': self._live_title(title) if is_live else title,
'description': clean_html(info.get('synopsis')), 'description': clean_html(info.get('synopsis')),
'thumbnail': urljoin('http://pluzz.francetv.fr', info.get('image')), 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
'is_live': is_live, 'is_live': is_live,

View File

@ -842,7 +842,7 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, },
# MTVSercices embed # MTVServices embed
{ {
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html', 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
'md5': 'ca1aef97695ef2c1d6973256a57e5252', 'md5': 'ca1aef97695ef2c1d6973256a57e5252',

View File

@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
try_get,
update_url_query, update_url_query,
) )
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
# video can't be watched anonymously due to view count limit reached, # video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046) # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view', 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
'md5': 'bfbd670d03a470bb1e6d4a257adec12e', 'only_matching': True,
'info_dict': {
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
'ext': 'mp4',
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
}
}, { }, {
# video id is longer than 28 characters # video id is longer than 28 characters
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
'info_dict': {
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
'ext': 'mp4',
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
'duration': 189,
},
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28', 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( video_info = compat_parse_qs(self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id) 'https://drive.google.com/get_video_info',
video_id, query={'docid': video_id}))
title = self._search_regex( def get_value(key):
r'"title"\s*,\s*"([^"]+)', webpage, 'title', return try_get(video_info, lambda x: x[key][0])
default=None) or self._og_search_title(webpage)
duration = int_or_none(self._search_regex( reason = get_value('reason')
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', title = get_value('title')
default=None)) if not title and reason:
raise ExtractorError(reason, expected=True)
formats = [] formats = []
fmt_stream_map = self._search_regex( fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, fmt_list = (get_value('fmt_list') or '').split(',')
'fmt stream map', default='').split(',')
fmt_list = self._search_regex(
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
'fmt_list', default='').split(',')
if fmt_stream_map and fmt_list: if fmt_stream_map and fmt_list:
resolutions = {} resolutions = {}
for fmt in fmt_list: for fmt in fmt_list:
@ -257,19 +246,14 @@ class GoogleDriveIE(InfoExtractor):
if urlh and urlh.headers.get('Content-Disposition'): if urlh and urlh.headers.get('Content-Disposition'):
add_source_format(urlh) add_source_format(urlh)
if not formats: if not formats and reason:
reason = self._search_regex( raise ExtractorError(reason, expected=True)
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason, expected=True)
self._sort_formats(formats) self._sort_formats(formats)
hl = self._search_regex( hl = get_value('hl')
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
subtitles_id = None subtitles_id = None
ttsurl = self._search_regex( ttsurl = get_value('ttsurl')
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
if ttsurl: if ttsurl:
# the video Id for subtitles will be the last value in the ttsurl # the video Id for subtitles will be the last value in the ttsurl
# query string # query string
@ -281,8 +265,8 @@ class GoogleDriveIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': self._og_search_thumbnail(webpage, default=None), 'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
'duration': duration, 'duration': int_or_none(get_value('length_seconds')),
'formats': formats, 'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
'automatic_captions': self.extract_automatic_captions( 'automatic_captions': self.extract_automatic_captions(

View File

@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
def _extract_rtmp_video(self, webpage): def _extract_rtmp_video(self, webpage):
# The server URL is hardcoded # The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/' video_url = 'rtmpe://videof.infoq.com/cfx/st/'
# Extract video URL # Extract video URL
encoded_id = self._search_regex( encoded_id = self._search_regex(
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
return [{ return [{
'format_id': 'http_video', 'format_id': 'http_video',
'url': http_video_url, 'url': http_video_url,
'http_headers': {'Referer': 'https://www.infoq.com/'},
}] }]
def _extract_http_audio(self, webpage, video_id): def _extract_http_audio(self, webpage, video_id):
fields = self._hidden_inputs(webpage) fields = self._form_hidden_inputs('mp3Form', webpage)
http_audio_url = fields.get('filename') http_audio_url = fields.get('filename')
if not http_audio_url: if not http_audio_url:
return [] return []
# base URL is found in the Location header in the response returned by # base URL is found in the Location header in the response returned by
# GET https://www.infoq.com/mp3download.action?filename=... when logged in. # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url) http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage)) http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
# audio file seem to be missing some times even if there is a download link # audio file seem to be missing some times even if there is a download link

View File

@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor):
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000) duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
description = xpath_text(doc, 'ABSTRACT') description = xpath_text(doc, 'ABSTRACT')
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME') thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate')) creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content') quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
formats = [] formats = []
@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor):
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'timestamp': createtion_time, 'timestamp': creation_time,
} }

View File

@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
}, },
}], }],
}, { }, {
# mutlimedia, not media title # multimedia, not media title
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
'info_dict': { 'info_dict': {
'id': '533198237', 'id': '533198237',

View File

@ -477,7 +477,7 @@ class PBSIE(InfoExtractor):
if media_id: if media_id:
return media_id, presumptive_id, upload_date, description return media_id, presumptive_id, upload_date, description
# Fronline video embedded via flp # Frontline video embedded via flp
video_id = self._search_regex( video_id = self._search_regex(
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None) r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
if video_id: if video_id:

View File

@ -16,8 +16,9 @@ from ..utils import (
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
parse_duration, parse_duration,
remove_start,
strip_or_none, strip_or_none,
unescapeHTML, try_get,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -67,7 +68,7 @@ class RaiBaseIE(InfoExtractor):
# This does not imply geo restriction (e.g. # This does not imply geo restriction (e.g.
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
if media_url == 'http://download.rai.it/video_no_available.mp4': if '/video_no_available.mp4' in media_url:
continue continue
ext = determine_ext(media_url) ext = determine_ext(media_url)
@ -122,27 +123,8 @@ class RaiBaseIE(InfoExtractor):
class RaiPlayIE(RaiBaseIE): class RaiPlayIE(RaiBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.(?:html|json))' % RaiBaseIE._UUID_RE _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
_TESTS = [{ _TESTS = [{
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
'md5': '340aa3b7afb54bfd14a8c11786450d76',
'info_dict': {
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
'ext': 'mp4',
'title': 'La Casa Bianca',
'alt_title': 'S2016 - Puntata del 23/10/2016',
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 3',
'creator': 'Rai 3',
'duration': 3278,
'timestamp': 1477764300,
'upload_date': '20161029',
'series': 'La Casa Bianca',
'season': '2016',
},
'skip': 'This content is not available',
}, {
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': { 'info_dict': {
@ -166,10 +148,10 @@ class RaiPlayIE(RaiBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
url, video_id = re.match(self._VALID_URL, url).groups() base, video_id = re.match(self._VALID_URL, url).groups()
media = self._download_json( media = self._download_json(
url.replace('.html', '.json'), video_id, 'Downloading video JSON') base + '.json', video_id, 'Downloading video JSON')
title = media['name'] title = media['name']
video = media['video'] video = media['video']
@ -195,7 +177,8 @@ class RaiPlayIE(RaiBaseIE):
season = media.get('season') season = media.get('season')
info = { info = {
'id': video_id, 'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
'display_id': video_id,
'title': self._live_title(title) if relinker_info.get( 'title': self._live_title(title) if relinker_info.get(
'is_live') else title, 'is_live') else title,
'alt_title': strip_or_none(media.get('subtitle')), 'alt_title': strip_or_none(media.get('subtitle')),
@ -217,16 +200,16 @@ class RaiPlayIE(RaiBaseIE):
return info return info
class RaiPlayLiveIE(RaiBaseIE): class RaiPlayLiveIE(RaiPlayIE):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
_TEST = { _TESTS = [{
'url': 'http://www.raiplay.it/dirette/rainews24', 'url': 'http://www.raiplay.it/dirette/rainews24',
'info_dict': { 'info_dict': {
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
'display_id': 'rainews24', 'display_id': 'rainews24',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:6eca31500550f9376819f174e5644754', 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
'uploader': 'Rai News 24', 'uploader': 'Rai News 24',
'creator': 'Rai News 24', 'creator': 'Rai News 24',
'is_live': True, 'is_live': True,
@ -234,58 +217,50 @@ class RaiPlayLiveIE(RaiBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} }]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
webpage, 'content id')
return {
'_type': 'url_transparent',
'ie_key': RaiPlayIE.ie_key(),
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
'id': video_id,
'display_id': display_id,
}
class RaiPlayPlaylistIE(InfoExtractor): class RaiPlayPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
'info_dict': { 'info_dict': {
'id': 'nondirloalmiocapo', 'id': 'nondirloalmiocapo',
'title': 'Non dirlo al mio capo', 'title': 'Non dirlo al mio capo',
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
}, },
'playlist_mincount': 12, 'playlist_mincount': 12,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) base, playlist_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, playlist_id) program = self._download_json(
base + '.json', playlist_id, 'Downloading program JSON')
title = self._html_search_meta(
('programma', 'nomeProgramma'), webpage, 'title')
description = unescapeHTML(self._html_search_meta(
('description', 'og:description'), webpage, 'description'))
entries = [] entries = []
for mobj in re.finditer( for b in (program.get('blocks') or []):
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', for s in (b.get('sets') or []):
webpage): s_id = s.get('id')
video_url = urljoin(url, mobj.group('path')) if not s_id:
entries.append(self.url_result( continue
video_url, ie=RaiPlayIE.ie_key(), medias = self._download_json(
video_id=RaiPlayIE._match_id(video_url))) '%s/%s.json' % (base, s_id), s_id,
'Downloading content set JSON', fatal=False)
if not medias:
continue
for m in (medias.get('items') or []):
path_id = m.get('path_id')
if not path_id:
continue
video_url = urljoin(url, path_id)
entries.append(self.url_result(
video_url, ie=RaiPlayIE.ie_key(),
video_id=RaiPlayIE._match_id(video_url)))
return self.playlist_result(entries, playlist_id, title, description) return self.playlist_result(
entries, playlist_id, program.get('name'),
try_get(program, lambda x: x['program_info']['description']))
class RaiIE(RaiBaseIE): class RaiIE(RaiBaseIE):
@ -328,19 +303,6 @@ class RaiIE(RaiBaseIE):
'duration': 2214, 'duration': 2214,
'upload_date': '20161103', 'upload_date': '20161103',
} }
}, {
# drawMediaRaiTV(...)
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20141221',
},
'skip': 'This content is not available',
}, { }, {
# initEdizione('ContentItem-...' # initEdizione('ContentItem-...'
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
@ -352,18 +314,6 @@ class RaiIE(RaiBaseIE):
'upload_date': '20170401', 'upload_date': '20170401',
}, },
'skip': 'Changes daily', 'skip': 'Changes daily',
}, {
# HDS live stream with only relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'params': {
'skip_download': True,
},
'skip': 'This content is available only in Italy',
}, { }, {
# HLS live stream with ContentItem in og:url # HLS live stream with ContentItem in og:url
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
@ -473,7 +423,7 @@ class RaiIE(RaiBaseIE):
except ExtractorError: except ExtractorError:
pass pass
relinker_url = self._search_regex( relinker_url = self._proto_relative_url(self._search_regex(
r'''(?x) r'''(?x)
(?: (?:
var\s+videoURL| var\s+videoURL|
@ -485,7 +435,7 @@ class RaiIE(RaiBaseIE):
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
''', ''',
webpage, 'relinker URL', group='url') webpage, 'relinker URL', group='url'))
relinker_info = self._extract_relinker_info( relinker_info = self._extract_relinker_info(
urljoin(url, relinker_url), video_id) urljoin(url, relinker_url), video_id)

View File

@ -649,7 +649,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200. # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
# https://developers.soundcloud.com/blog/offset-pagination-deprecated # https://developers.soundcloud.com/blog/offset-pagination-deprecated
COMMON_QUERY = { COMMON_QUERY = {
'limit': 200, 'limit': 200,

View File

@ -9,6 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
unified_timestamp,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
try_get, try_get,
@ -44,7 +45,8 @@ class SVTBaseIE(InfoExtractor):
'format_id': player_type, 'format_id': player_type,
'url': vurl, 'url': vurl,
}) })
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): rights = try_get(video_info, lambda x: x['rights'], dict) or {}
if not formats and rights.get('geoBlockedSweden'):
self.raise_geo_restricted( self.raise_geo_restricted(
'This video is only available in Sweden', 'This video is only available in Sweden',
countries=self._GEO_COUNTRIES) countries=self._GEO_COUNTRIES)
@ -70,6 +72,7 @@ class SVTBaseIE(InfoExtractor):
episode = video_info.get('episodeTitle') episode = video_info.get('episodeTitle')
episode_number = int_or_none(video_info.get('episodeNumber')) episode_number = int_or_none(video_info.get('episodeNumber'))
timestamp = unified_timestamp(rights.get('validFrom'))
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
age_limit = None age_limit = None
adult = dict_get( adult = dict_get(
@ -84,6 +87,7 @@ class SVTBaseIE(InfoExtractor):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'duration': duration, 'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit, 'age_limit': age_limit,
'series': series, 'series': series,
'season_number': season_number, 'season_number': season_number,
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv' IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
svt:(?P<svt_id>[^/?#&]+)| (?:
svt:|
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
)
(?P<svt_id>[^/?#&]+)|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
) )
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 'md5': '2382036fd6f8c994856c323fe51c426e',
'info_dict': { 'info_dict': {
'id': '5996901', 'id': 'jNwpV9P',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Flygplan till Haile Selassie', 'title': 'Det här är himlen',
'duration': 3527, 'timestamp': 1586044800,
'thumbnail': r're:^https?://.*[\.-]jpg$', 'upload_date': '20200405',
'duration': 3515,
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
'age_limit': 0, 'age_limit': 0,
'subtitles': { 'subtitles': {
'sv': [{ 'sv': [{
'ext': 'wsrt', 'ext': 'vtt',
}] }]
}, },
}, },
'params': {
'format': 'bestvideo',
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses
# init segments that are smaller
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
'skip_download': True,
},
}, { }, {
# geo restricted to Sweden # geo restricted to Sweden
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
}, { }, {
'url': 'svt:14278044', 'url': 'svt:14278044',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
'only_matching': True,
}, {
'url': 'svt:eWv5MLX',
'only_matching': True,
}] }]
def _adjust_title(self, info): def _adjust_title(self, info):
@ -236,7 +259,10 @@ class SVTPlayIE(SVTPlayBaseIE):
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'), r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
webpage, 'video id') webpage, 'video id')
return self._extract_by_video_id(svt_id, webpage) info_dict = self._extract_by_video_id(svt_id, webpage)
info_dict['thumbnail'] = thumbnail
return info_dict
class SVTSeriesIE(SVTPlayBaseIE): class SVTSeriesIE(SVTPlayBaseIE):
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url) return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = re.match(self._VALID_URL, url).groups() path, display_id = re.match(self._VALID_URL, url).groups()

View File

@ -86,7 +86,7 @@ class TagesschauPlayerIE(InfoExtractor):
# return self._extract_via_api(kind, video_id) # return self._extract_via_api(kind, video_id)
# JSON api does not provide some audio formats (e.g. ogg) thus # JSON api does not provide some audio formats (e.g. ogg) thus
# extractiong audio via webpage # extracting audio via webpage
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)

View File

@ -208,7 +208,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
if m: if m:
return [m.group('url')] return [m.group('url')]
# Are whitesapces ignored in URLs? # Are whitespaces ignored in URLs?
# https://github.com/ytdl-org/youtube-dl/issues/12044 # https://github.com/ytdl-org/youtube-dl/issues/12044
matches = re.findall( matches = re.findall(
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)

View File

@ -56,9 +56,9 @@ class TurnerBaseIE(AdobePassIE):
content_id = xpath_text(video_data, 'contentId') or video_id content_id = xpath_text(video_data, 'contentId') or video_id
# rtmp_src = xpath_text(video_data, 'akamai/src') # rtmp_src = xpath_text(video_data, 'akamai/src')
# if rtmp_src: # if rtmp_src:
# splited_rtmp_src = rtmp_src.split(',') # split_rtmp_src = rtmp_src.split(',')
# if len(splited_rtmp_src) == 2: # if len(split_rtmp_src) == 2:
# rtmp_src = splited_rtmp_src[1] # rtmp_src = split_rtmp_src[1]
# aifp = xpath_text(video_data, 'akamai/aifp', default='') # aifp = xpath_text(video_data, 'akamai/aifp', default='')
urls = [] urls = []

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import hashlib import hashlib
import hmac import hmac
import itertools import itertools
@ -9,6 +10,10 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -166,19 +171,20 @@ class VikiIE(VikiBaseIE):
}, { }, {
# episode # episode
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
'md5': '5fa476a902e902783ac7a4d615cdbc7a', 'md5': '94e0e34fd58f169f40c184f232356cfe',
'info_dict': { 'info_dict': {
'id': '44699v', 'id': '44699v',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Boys Over Flowers - Episode 1', 'title': 'Boys Over Flowers - Episode 1',
'description': 'md5:b89cf50038b480b88b5b3c93589a9076', 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
'duration': 4204, 'duration': 4172,
'timestamp': 1270496524, 'timestamp': 1270496524,
'upload_date': '20100405', 'upload_date': '20100405',
'uploader': 'group8', 'uploader': 'group8',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, { }, {
# youtube external # youtube external
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1', 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@ -195,14 +201,15 @@ class VikiIE(VikiBaseIE):
'uploader_id': 'ad14065n', 'uploader_id': 'ad14065n',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'skip': 'Page not found!',
}, { }, {
'url': 'http://www.viki.com/player/44699v', 'url': 'http://www.viki.com/player/44699v',
'only_matching': True, 'only_matching': True,
}, { }, {
# non-English description # non-English description
'url': 'http://www.viki.com/videos/158036v-love-in-magic', 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
'md5': '1713ae35df5a521b31f6dc40730e7c9c', 'md5': 'adf9e321a0ae5d0aace349efaaff7691',
'info_dict': { 'info_dict': {
'id': '158036v', 'id': '158036v',
'ext': 'mp4', 'ext': 'mp4',
@ -218,71 +225,11 @@ class VikiIE(VikiBaseIE):
def _real_extract(self, url):