Updated to release 2020.11.21.1

2025-09-20 01:48:42 -04:00 · 2020-11-21 20:20:42 +05:30 · 2020-11-21 20:20:42 +05:30 · a0566bbf5c
commit a0566bbf5c
parent 3462ffa892
29 changed files with 559 additions and 360 deletions
--- a/test/parameters.json
+++ b/test/parameters.json
@ -37,7 +37,7 @@
    "writeinfojson": true, 
    "writesubtitles": false,
    "allsubtitles": false,
-    "listssubtitles": false,
+    "listsubtitles": false,
    "socket_timeout": 20,
    "fixup": "never"
 }
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
        self.assertEqual(downloaded['extractor'], 'testex')
        self.assertEqual(downloaded['extractor_key'], 'TestEx')
    # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
    def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
        class _YDL(YDL):
            def __init__(self, *args, **kwargs):
                super(_YDL, self).__init__(*args, **kwargs)
            def trouble(self, s, tb=None):
                pass
        ydl = _YDL({
            'format': 'extra',
            'ignoreerrors': True,
        })
        class VideoIE(InfoExtractor):
            _VALID_URL = r'video:(?P<id>\d+)'
            def _real_extract(self, url):
                video_id = self._match_id(url)
                formats = [{
                    'format_id': 'default',
                    'url': 'url:',
                }]
                if video_id == '0':
                    raise ExtractorError('foo')
                if video_id == '2':
                    formats.append({
                        'format_id': 'extra',
                        'url': TEST_URL,
                    })
                return {
                    'id': video_id,
                    'title': 'Video %s' % video_id,
                    'formats': formats,
                }
        class PlaylistIE(InfoExtractor):
            _VALID_URL = r'playlist:'
            def _entries(self):
                for n in range(3):
                    video_id = compat_str(n)
                    yield {
                        '_type': 'url_transparent',
                        'ie_key': VideoIE.ie_key(),
                        'id': video_id,
                        'url': 'video:%s' % video_id,
                        'title': 'Video Transparent %s' % video_id,
                    }
            def _real_extract(self, url):
                return self.playlist_result(self._entries())
        ydl.add_info_extractor(VideoIE(ydl))
        ydl.add_info_extractor(PlaylistIE(ydl))
        info = ydl.extract_info('playlist:')
        entries = info['entries']
        self.assertEqual(len(entries), 3)
        self.assertTrue(entries[0] is None)
        self.assertTrue(entries[1] is None)
        self.assertEqual(len(ydl.downloaded_info_dicts), 1)
        downloaded = ydl.downloaded_info_dicts[0]
        self.assertEqual(entries[2], downloaded)
        self.assertEqual(downloaded['url'], TEST_URL)
        self.assertEqual(downloaded['title'], 'Video Transparent 2')
        self.assertEqual(downloaded['id'], '2')
        self.assertEqual(downloaded['extractor'], 'Video')
        self.assertEqual(downloaded['extractor_key'], 'Video')
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@ -830,34 +830,23 @@ class YoutubeDL(object):
                                    'and will probably not work.')
            try:
-                try:
+                temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
-                    temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
+            except (AssertionError, IndexError, AttributeError):
-                except (AssertionError, IndexError, AttributeError):
+                temp_id = None
-                    temp_id = None
+            if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
-                if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
+                self.to_screen("[%s] %s: has already been recorded in archive" % (
-                    self.to_screen("[%s] %s: has already been recorded in archive" % (
+                               ie_key, temp_id))
-                                   ie_key, temp_id))
+                break
                    break
-                ie_result = ie.extract(url)
+            return self.__extract_info(url, ie, download, extra_info, process, info_dict)
-                if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
+
-                    break
+        else:
-                if isinstance(ie_result, list):
+            self.report_error('no suitable InfoExtractor for URL %s' % url)
-                    # Backwards compatibility: old IE result format
+
-                    ie_result = {
+    def __handle_extraction_exceptions(func):
-                        '_type': 'compat_list',
+        def wrapper(self, *args, **kwargs):
-                        'entries': ie_result,
+            try:
-                    }
+                return func(self, *args, **kwargs)
                if info_dict:
                    if info_dict.get('id'):
                        ie_result['id'] = info_dict['id']
                    if info_dict.get('title'):
                        ie_result['title'] = info_dict['title']
                self.add_default_extra_info(ie_result, ie, url)
                if process:
                    return self.process_ie_result(ie_result, download, extra_info)
                else:
                    return ie_result
            except GeoRestrictedError as e:
                msg = e.msg
                if e.countries:
@ -865,20 +854,38 @@ class YoutubeDL(object):
                        map(ISO3166Utils.short2full, e.countries))
                msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
                self.report_error(msg)
                break
            except ExtractorError as e:  # An error we somewhat expected
                self.report_error(compat_str(e), e.format_traceback())
                break
            except MaxDownloadsReached:
                raise
            except Exception as e:
                if self.params.get('ignoreerrors', False):
                    self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
                    break
                else:
                    raise
        return wrapper
    @__handle_extraction_exceptions
    def __extract_info(self, url, ie, download, extra_info, process, info_dict):
        ie_result = ie.extract(url)
        if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
            return
        if isinstance(ie_result, list):
            # Backwards compatibility: old IE result format
            ie_result = {
                '_type': 'compat_list',
                'entries': ie_result,
            }
        if info_dict:
            if info_dict.get('id'):
                ie_result['id'] = info_dict['id']
            if info_dict.get('title'):
                ie_result['title'] = info_dict['title']
        self.add_default_extra_info(ie_result, ie, url)
        if process:
            return self.process_ie_result(ie_result, download, extra_info)
        else:
-            self.report_error('no suitable InfoExtractor for URL %s' % url)
+            return ie_result
    def add_default_extra_info(self, ie_result, ie, url):
        self.add_extra_info(ie_result, {
@ -1057,9 +1064,8 @@ class YoutubeDL(object):
                        self.to_screen('[download] ' + reason)
                        continue
-                entry_result = self.process_ie_result(entry,
+                entry_result = self.__process_iterable_entry(entry, download, extra)
-                                                      download=download,
+                # TODO: skip failed (empty) entries?
                                                      extra_info=extra)
                playlist_results.append(entry_result)
            ie_result['entries'] = playlist_results
            self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@ -1088,6 +1094,11 @@ class YoutubeDL(object):
        else:
            raise Exception('Invalid result type: %s' % result_type)
    @__handle_extraction_exceptions
    def __process_iterable_entry(self, entry, download, extra_info):
        return self.process_ie_result(
            entry, download=download, extra_info=extra_info)
    def _build_format_filter(self, filter_spec):
        " Returns a function to filter the formats according to the filter_spec "
--- a/youtube_dlc/compat.py
+++ b/youtube_dlc/compat.py
@ -2345,7 +2345,7 @@ except ImportError:  # Python <3.4
        # HTMLParseError has been deprecated in Python 3.3 and removed in
        # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
-        # and uniform cross-version exceptiong handling
+        # and uniform cross-version exception handling
        class compat_HTMLParseError(Exception):
            pass
--- a/youtube_dlc/downloader/http.py
+++ b/youtube_dlc/downloader/http.py
@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
                try:
                    ctx.data = self.ydl.urlopen(request)
                except (compat_urllib_error.URLError, ) as err:
-                    if isinstance(err.reason, socket.timeout):
+                    # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
                    reason = getattr(err, 'reason', None)
                    if isinstance(reason, socket.timeout):
                        raise RetryDownload(err)
                    raise err
                # When trying to resume, Content-Range HTTP header of response has to be checked
--- a/youtube_dlc/extractor/amara.py
+++ b/youtube_dlc/extractor/amara.py
@ -0,0 +1,103 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from .vimeo import VimeoIE
 from ..utils import (
    int_or_none,
    parse_iso8601,
    update_url_query,
 )
 class AmaraIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
    _TESTS = [{
        # Youtube
        'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
        'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
        'info_dict': {
            'id': 'h6ZuVdvYnfE',
            'ext': 'mp4',
            'title': 'Why jury trials are becoming less common',
            'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': dict,
            'upload_date': '20160813',
            'uploader': 'PBS NewsHour',
            'uploader_id': 'PBSNewsHour',
            'timestamp': 1549639570,
        }
    }, {
        # Vimeo
        'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
        'md5': '99392c75fa05d432a8f11df03612195e',
        'info_dict': {
            'id': '18622084',
            'ext': 'mov',
            'title': 'Vimeo at CES 2011!',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': dict,
            'timestamp': 1294763658,
            'upload_date': '20110111',
            'uploader': 'Sam Morrill',
            'uploader_id': 'sammorrill'
        }
    }, {
        # Direct Link
        'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
        'md5': 'd3970f08512738ee60c5807311ff5d3f',
        'info_dict': {
            'id': 's8KL7I3jLmh6',
            'ext': 'mp4',
            'title': 'The danger of a single story',
            'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': dict,
            'upload_date': '20091007',
            'timestamp': 1254942511,
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        meta = self._download_json(
            'https://amara.org/api/videos/%s/' % video_id,
            video_id, query={'format': 'json'})
        title = meta['title']
        video_url = meta['all_urls'][0]
        subtitles = {}
        for language in (meta.get('languages') or []):
            subtitles_uri = language.get('subtitles_uri')
            if not (subtitles_uri and language.get('published')):
                continue
            subtitle = subtitles.setdefault(language.get('code') or 'en', [])
            for f in ('json', 'srt', 'vtt'):
                subtitle.append({
                    'ext': f,
                    'url': update_url_query(subtitles_uri, {'format': f}),
                })
        info = {
            'url': video_url,
            'id': video_id,
            'subtitles': subtitles,
            'title': title,
            'description': meta.get('description'),
            'thumbnail': meta.get('thumbnail'),
            'duration': int_or_none(meta.get('duration')),
            'timestamp': parse_iso8601(meta.get('created')),
        }
        for ie in (YoutubeIE, VimeoIE):
            if ie.suitable(video_url):
                info.update({
                    '_type': 'url_transparent',
                    'ie_key': ie.ie_key(),
                })
                break
        return info
--- a/youtube_dlc/extractor/brightcove.py
+++ b/youtube_dlc/extractor/brightcove.py
@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
    ]
    @classmethod
-    def _build_brighcove_url(cls, object_str):
+    def _build_brightcove_url(cls, object_str):
        """
        Build a Brightcove url from a xml string containing
        <object class="BrightcoveExperience">{params}</object>
@ -217,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
        return cls._make_brightcove_url(params)
    @classmethod
-    def _build_brighcove_url_from_js(cls, object_js):
+    def _build_brightcove_url_from_js(cls, object_js):
        # The layout of JS is as follows:
        # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
        #   // build Brightcove <object /> XML
@ -272,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
            ).+?>\s*</object>''',
            webpage)
        if matches:
-            return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
+            return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
        matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
        if matches:
            return list(filter(None, [
-                cls._build_brighcove_url_from_js(custom_bc)
+                cls._build_brightcove_url_from_js(custom_bc)
                for custom_bc in matches]))
        return [src for _, src in re.findall(
            r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
--- a/youtube_dlc/extractor/common.py
+++ b/youtube_dlc/extractor/common.py
@ -1664,7 +1664,7 @@ class InfoExtractor(object):
        # just the media without qualities renditions.
        # Fortunately, master playlist can be easily distinguished from media
        # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
-        # master playlist tags MUST NOT appear in a media playist and vice versa.
+        # master playlist tags MUST NOT appear in a media playlist and vice versa.
        # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
        # media playlist and MUST NOT appear in master playlist thus we can
        # clearly detect media playlist with this criterion.
--- a/youtube_dlc/extractor/discoverynetworks.py
+++ b/youtube_dlc/extractor/discoverynetworks.py
@ -7,7 +7,7 @@ from .dplay import DPlayIE
 class DiscoveryNetworksDeIE(DPlayIE):
-    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
    _TESTS = [{
        'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
    }, {
        'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
        'only_matching': True,
    }, {
        'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/youtube_dlc/extractor/europa.py
+++ b/youtube_dlc/extractor/europa.py
@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):
        title = get_item('title', preferred_langs) or video_id
        description = get_item('description', preferred_langs)
-        thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
+        thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
        upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
        duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
        view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
            'id': video_id,
            'title': title,
            'description': description,
-            'thumbnail': thumbnmail,
+            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'duration': duration,
            'view_count': view_count,
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
 from .amara import AmaraIE
 from .alura import (
    AluraIE,
    AluraCourseIE
@ -1507,7 +1508,6 @@ from .youtube import (
    YoutubeIE,
    YoutubeFavouritesIE,
    YoutubeHistoryIE,
    YoutubeLiveIE,
    YoutubeTabIE,
    YoutubePlaylistIE,
    YoutubeRecommendedIE,
--- a/youtube_dlc/extractor/francetv.py
+++ b/youtube_dlc/extractor/francetv.py
@ -211,7 +211,7 @@ class FranceTVIE(InfoExtractor):
            'id': video_id,
            'title': self._live_title(title) if is_live else title,
            'description': clean_html(info.get('synopsis')),
-            'thumbnail': urljoin('http://pluzz.francetv.fr', info.get('image')),
+            'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
            'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
            'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
            'is_live': is_live,
--- a/youtube_dlc/extractor/generic.py
+++ b/youtube_dlc/extractor/generic.py
@ -842,7 +842,7 @@ class GenericIE(InfoExtractor):
                'skip_download': True,
            }
        },
-        # MTVSercices embed
+        # MTVServices embed
        {
            'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
            'md5': 'ca1aef97695ef2c1d6973256a57e5252',
--- a/youtube_dlc/extractor/googledrive.py
+++ b/youtube_dlc/extractor/googledrive.py
@ -3,11 +3,13 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import compat_parse_qs
 from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    lowercase_escape,
    try_get,
    update_url_query,
 )
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
        # video can't be watched anonymously due to view count limit reached,
        # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
        'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
-        'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
+        'only_matching': True,
        'info_dict': {
            'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
            'ext': 'mp4',
            'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
        }
    }, {
        # video id is longer than 28 characters
        'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
        'info_dict': {
            'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
            'ext': 'mp4',
            'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
            'duration': 189,
        },
        'only_matching': True,
    }, {
        'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(
+        video_info = compat_parse_qs(self._download_webpage(
-            'http://docs.google.com/file/d/%s' % video_id, video_id)
+            'https://drive.google.com/get_video_info',
            video_id, query={'docid': video_id}))
-        title = self._search_regex(
+        def get_value(key):
-            r'"title"\s*,\s*"([^"]+)', webpage, 'title',
+            return try_get(video_info, lambda x: x[key][0])
-            default=None) or self._og_search_title(webpage)
+
-        duration = int_or_none(self._search_regex(
+        reason = get_value('reason')
-            r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
+        title = get_value('title')
-            default=None))
+        if not title and reason:
            raise ExtractorError(reason, expected=True)
        formats = []
-        fmt_stream_map = self._search_regex(
+        fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
-            r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
+        fmt_list = (get_value('fmt_list') or '').split(',')
            'fmt stream map', default='').split(',')
        fmt_list = self._search_regex(
            r'"fmt_list"\s*,\s*"([^"]+)', webpage,
            'fmt_list', default='').split(',')
        if fmt_stream_map and fmt_list:
            resolutions = {}
            for fmt in fmt_list:
@ -257,19 +246,14 @@ class GoogleDriveIE(InfoExtractor):
                        if urlh and urlh.headers.get('Content-Disposition'):
                            add_source_format(urlh)
-        if not formats:
+        if not formats and reason:
-            reason = self._search_regex(
+            raise ExtractorError(reason, expected=True)
                r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
            if reason:
                raise ExtractorError(reason, expected=True)
        self._sort_formats(formats)
-        hl = self._search_regex(
+        hl = get_value('hl')
            r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
        subtitles_id = None
-        ttsurl = self._search_regex(
+        ttsurl = get_value('ttsurl')
            r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
        if ttsurl:
            # the video Id for subtitles will be the last value in the ttsurl
            # query string
@ -281,8 +265,8 @@ class GoogleDriveIE(InfoExtractor):
        return {
            'id': video_id,
            'title': title,
-            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
-            'duration': duration,
+            'duration': int_or_none(get_value('length_seconds')),
            'formats': formats,
            'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
            'automatic_captions': self.extract_automatic_captions(
--- a/youtube_dlc/extractor/infoq.py
+++ b/youtube_dlc/extractor/infoq.py
@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
    def _extract_rtmp_video(self, webpage):
        # The server URL is hardcoded
-        video_url = 'rtmpe://video.infoq.com/cfx/st/'
+        video_url = 'rtmpe://videof.infoq.com/cfx/st/'
        # Extract video URL
        encoded_id = self._search_regex(
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
        return [{
            'format_id': 'http_video',
            'url': http_video_url,
            'http_headers': {'Referer': 'https://www.infoq.com/'},
        }]
    def _extract_http_audio(self, webpage, video_id):
-        fields = self._hidden_inputs(webpage)
+        fields = self._form_hidden_inputs('mp3Form', webpage)
        http_audio_url = fields.get('filename')
        if not http_audio_url:
            return []
        # base URL is found in the Location header in the response returned by
        # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
-        http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
+        http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
        http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
        # audio file seem to be missing some times even if there is a download link
--- a/youtube_dlc/extractor/kusi.py
+++ b/youtube_dlc/extractor/kusi.py
@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor):
        duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
        description = xpath_text(doc, 'ABSTRACT')
        thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
-        createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
+        creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
        quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
        formats = []
@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor):
            'duration': duration,
            'formats': formats,
            'thumbnail': thumbnail,
-            'timestamp': createtion_time,
+            'timestamp': creation_time,
        }
--- a/youtube_dlc/extractor/npr.py
+++ b/youtube_dlc/extractor/npr.py
@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
            },
        }],
    }, {
-        # mutlimedia, not media title
+        # multimedia, not media title
        'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
        'info_dict': {
            'id': '533198237',
--- a/youtube_dlc/extractor/pbs.py
+++ b/youtube_dlc/extractor/pbs.py
@ -477,7 +477,7 @@ class PBSIE(InfoExtractor):
            if media_id:
                return media_id, presumptive_id, upload_date, description
-            # Fronline video embedded via flp
+            # Frontline video embedded via flp
            video_id = self._search_regex(
                r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
            if video_id:
--- a/youtube_dlc/extractor/rai.py
+++ b/youtube_dlc/extractor/rai.py
@ -16,8 +16,9 @@ from ..utils import (
    GeoRestrictedError,
    int_or_none,
    parse_duration,
    remove_start,
    strip_or_none,
-    unescapeHTML,
+    try_get,
    unified_strdate,
    unified_timestamp,
    update_url_query,
@ -67,7 +68,7 @@ class RaiBaseIE(InfoExtractor):
            # This does not imply geo restriction (e.g.
            # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
-            if media_url == 'http://download.rai.it/video_no_available.mp4':
+            if '/video_no_available.mp4' in media_url:
                continue
            ext = determine_ext(media_url)
@ -122,27 +123,8 @@ class RaiBaseIE(InfoExtractor):
 class RaiPlayIE(RaiBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.(?:html|json))' % RaiBaseIE._UUID_RE
+    _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
    _TESTS = [{
        'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
        'md5': '340aa3b7afb54bfd14a8c11786450d76',
        'info_dict': {
            'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
            'ext': 'mp4',
            'title': 'La Casa Bianca',
            'alt_title': 'S2016 - Puntata del 23/10/2016',
            'description': 'md5:a09d45890850458077d1f68bb036e0a5',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'Rai 3',
            'creator': 'Rai 3',
            'duration': 3278,
            'timestamp': 1477764300,
            'upload_date': '20161029',
            'series': 'La Casa Bianca',
            'season': '2016',
        },
        'skip': 'This content is not available',
    }, {
        'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
        'md5': '8970abf8caf8aef4696e7b1f2adfc696',
        'info_dict': {
@ -166,10 +148,10 @@ class RaiPlayIE(RaiBaseIE):
    }]
    def _real_extract(self, url):
-        url, video_id = re.match(self._VALID_URL, url).groups()
+        base, video_id = re.match(self._VALID_URL, url).groups()
        media = self._download_json(
-            url.replace('.html', '.json'), video_id, 'Downloading video JSON')
+            base + '.json', video_id, 'Downloading video JSON')
        title = media['name']
        video = media['video']
@ -195,7 +177,8 @@ class RaiPlayIE(RaiBaseIE):
        season = media.get('season')
        info = {
-            'id': video_id,
+            'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
            'display_id': video_id,
            'title': self._live_title(title) if relinker_info.get(
                'is_live') else title,
            'alt_title': strip_or_none(media.get('subtitle')),
@ -217,16 +200,16 @@ class RaiPlayIE(RaiBaseIE):
        return info
-class RaiPlayLiveIE(RaiBaseIE):
+class RaiPlayLiveIE(RaiPlayIE):
-    _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.raiplay.it/dirette/rainews24',
        'info_dict': {
            'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
            'display_id': 'rainews24',
            'ext': 'mp4',
            'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': 'md5:6eca31500550f9376819f174e5644754',
+            'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
            'uploader': 'Rai News 24',
            'creator': 'Rai News 24',
            'is_live': True,
@ -234,58 +217,50 @@ class RaiPlayLiveIE(RaiBaseIE):
        'params': {
            'skip_download': True,
        },
-    }
+    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        video_id = self._search_regex(
            r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
            webpage, 'content id')
        return {
            '_type': 'url_transparent',
            'ie_key': RaiPlayIE.ie_key(),
            'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
            'id': video_id,
            'display_id': display_id,
        }
 class RaiPlayPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
    _TESTS = [{
        'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
        'info_dict': {
            'id': 'nondirloalmiocapo',
            'title': 'Non dirlo al mio capo',
-            'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
+            'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
        },
        'playlist_mincount': 12,
    }]
    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+        base, playlist_id = re.match(self._VALID_URL, url).groups()
-        webpage = self._download_webpage(url, playlist_id)
+        program = self._download_json(
-
+            base + '.json', playlist_id, 'Downloading program JSON')
        title = self._html_search_meta(
            ('programma', 'nomeProgramma'), webpage, 'title')
        description = unescapeHTML(self._html_search_meta(
            ('description', 'og:description'), webpage, 'description'))
        entries = []
-        for mobj in re.finditer(
+        for b in (program.get('blocks') or []):
-                r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
+            for s in (b.get('sets') or []):
-                webpage):
+                s_id = s.get('id')
-            video_url = urljoin(url, mobj.group('path'))
+                if not s_id:
-            entries.append(self.url_result(
+                    continue
-                video_url, ie=RaiPlayIE.ie_key(),
+                medias = self._download_json(
-                video_id=RaiPlayIE._match_id(video_url)))
+                    '%s/%s.json' % (base, s_id), s_id,
                    'Downloading content set JSON', fatal=False)
                if not medias:
                    continue
                for m in (medias.get('items') or []):
                    path_id = m.get('path_id')
                    if not path_id:
                        continue
                    video_url = urljoin(url, path_id)
                    entries.append(self.url_result(
                        video_url, ie=RaiPlayIE.ie_key(),
                        video_id=RaiPlayIE._match_id(video_url)))
-        return self.playlist_result(entries, playlist_id, title, description)
+        return self.playlist_result(
            entries, playlist_id, program.get('name'),
            try_get(program, lambda x: x['program_info']['description']))
 class RaiIE(RaiBaseIE):
@ -328,19 +303,6 @@ class RaiIE(RaiBaseIE):
            'duration': 2214,
            'upload_date': '20161103',
        }
    }, {
        # drawMediaRaiTV(...)
        'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
        'md5': '2dd727e61114e1ee9c47f0da6914e178',
        'info_dict': {
            'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
            'ext': 'mp4',
            'title': 'Il pacco',
            'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20141221',
        },
        'skip': 'This content is not available',
    }, {
        # initEdizione('ContentItem-...'
        'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
@ -352,18 +314,6 @@ class RaiIE(RaiBaseIE):
            'upload_date': '20170401',
        },
        'skip': 'Changes daily',
    }, {
        # HDS live stream with only relinker URL
        'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
        'info_dict': {
            'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
            'ext': 'flv',
            'title': 'EuroNews',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'This content is available only in Italy',
    }, {
        # HLS live stream with ContentItem in og:url
        'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
@ -473,7 +423,7 @@ class RaiIE(RaiBaseIE):
            except ExtractorError:
                pass
-        relinker_url = self._search_regex(
+        relinker_url = self._proto_relative_url(self._search_regex(
            r'''(?x)
                (?:
                    var\s+videoURL|
@ -485,7 +435,7 @@ class RaiIE(RaiBaseIE):
                    //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
                    (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
            ''',
-            webpage, 'relinker URL', group='url')
+            webpage, 'relinker URL', group='url'))
        relinker_info = self._extract_relinker_info(
            urljoin(url, relinker_url), video_id)
--- a/youtube_dlc/extractor/soundcloud.py
+++ b/youtube_dlc/extractor/soundcloud.py
@ -649,7 +649,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
 class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
    def _extract_playlist(self, base_url, playlist_id, playlist_title):
-        # Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200.
+        # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
        # https://developers.soundcloud.com/blog/offset-pagination-deprecated
        COMMON_QUERY = {
            'limit': 200,
--- a/youtube_dlc/extractor/svt.py
+++ b/youtube_dlc/extractor/svt.py
@ -9,6 +9,7 @@ from ..utils import (
    determine_ext,
    dict_get,
    int_or_none,
    unified_timestamp,
    str_or_none,
    strip_or_none,
    try_get,
@ -44,7 +45,8 @@ class SVTBaseIE(InfoExtractor):
                    'format_id': player_type,
                    'url': vurl,
                })
-        if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
+        rights = try_get(video_info, lambda x: x['rights'], dict) or {}
        if not formats and rights.get('geoBlockedSweden'):
            self.raise_geo_restricted(
                'This video is only available in Sweden',
                countries=self._GEO_COUNTRIES)
@ -70,6 +72,7 @@ class SVTBaseIE(InfoExtractor):
        episode = video_info.get('episodeTitle')
        episode_number = int_or_none(video_info.get('episodeNumber'))
        timestamp = unified_timestamp(rights.get('validFrom'))
        duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
        age_limit = None
        adult = dict_get(
@ -84,6 +87,7 @@ class SVTBaseIE(InfoExtractor):
            'formats': formats,
            'subtitles': subtitles,
            'duration': duration,
            'timestamp': timestamp,
            'age_limit': age_limit,
            'series': series,
            'season_number': season_number,
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
    IE_DESC = 'SVT Play and Öppet arkiv'
    _VALID_URL = r'''(?x)
                    (?:
-                        svt:(?P<svt_id>[^/?#&]+)|
+                        (?:
                            svt:|
                            https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
                        )
                        (?P<svt_id>[^/?#&]+)|
                        https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
                    )
                    '''
    _TESTS = [{
-        'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
+        'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
-        'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
+        'md5': '2382036fd6f8c994856c323fe51c426e',
        'info_dict': {
-            'id': '5996901',
+            'id': 'jNwpV9P',
            'ext': 'mp4',
-            'title': 'Flygplan till Haile Selassie',
+            'title': 'Det här är himlen',
-            'duration': 3527,
+            'timestamp': 1586044800,
-            'thumbnail': r're:^https?://.*[\.-]jpg$',
+            'upload_date': '20200405',
            'duration': 3515,
            'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
            'age_limit': 0,
            'subtitles': {
                'sv': [{
-                    'ext': 'wsrt',
+                    'ext': 'vtt',
                }]
            },
        },
        'params': {
            'format': 'bestvideo',
            # skip for now due to download test asserts that segment is > 10000 bytes and svt uses
            # init segments that are smaller
            # AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
            'skip_download': True,
        },
    }, {
        # geo restricted to Sweden
        'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
    }, {
        'url': 'svt:14278044',
        'only_matching': True,
    }, {
        'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
        'only_matching': True,
    }, {
        'url': 'svt:eWv5MLX',
        'only_matching': True,
    }]
    def _adjust_title(self, info):
@ -236,7 +259,10 @@ class SVTPlayIE(SVTPlayBaseIE):
                 r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
                webpage, 'video id')
-        return self._extract_by_video_id(svt_id, webpage)
+        info_dict = self._extract_by_video_id(svt_id, webpage)
        info_dict['thumbnail'] = thumbnail
        return info_dict
 class SVTSeriesIE(SVTPlayBaseIE):
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
    @classmethod
    def suitable(cls, url):
-        return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
+        return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
    def _real_extract(self, url):
        path, display_id = re.match(self._VALID_URL, url).groups()
--- a/youtube_dlc/extractor/tagesschau.py
+++ b/youtube_dlc/extractor/tagesschau.py
@ -86,7 +86,7 @@ class TagesschauPlayerIE(InfoExtractor):
        #     return self._extract_via_api(kind, video_id)
        # JSON api does not provide some audio formats (e.g. ogg) thus
-        # extractiong audio via webpage
+        # extracting audio via webpage
        webpage = self._download_webpage(url, video_id)
--- a/youtube_dlc/extractor/theplatform.py
+++ b/youtube_dlc/extractor/theplatform.py
@ -208,7 +208,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
        if m:
            return [m.group('url')]
-        # Are whitesapces ignored in URLs?
+        # Are whitespaces ignored in URLs?
        # https://github.com/ytdl-org/youtube-dl/issues/12044
        matches = re.findall(
            r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
--- a/youtube_dlc/extractor/turner.py
+++ b/youtube_dlc/extractor/turner.py
@ -56,9 +56,9 @@ class TurnerBaseIE(AdobePassIE):
        content_id = xpath_text(video_data, 'contentId') or video_id
        # rtmp_src = xpath_text(video_data, 'akamai/src')
        # if rtmp_src:
-        #     splited_rtmp_src = rtmp_src.split(',')
+        #     split_rtmp_src = rtmp_src.split(',')
-        #     if len(splited_rtmp_src) == 2:
+        #     if len(split_rtmp_src) == 2:
-        #         rtmp_src = splited_rtmp_src[1]
+        #         rtmp_src = split_rtmp_src[1]
        # aifp = xpath_text(video_data, 'akamai/aifp', default='')
        urls = []
--- a/youtube_dlc/extractor/viki.py
+++ b/youtube_dlc/extractor/viki.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import base64
 import hashlib
 import hmac
 import itertools
@ -9,6 +10,10 @@ import re
 import time
 from .common import InfoExtractor
 from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    ExtractorError,
    int_or_none,
@ -166,19 +171,20 @@ class VikiIE(VikiBaseIE):
    }, {
        # episode
        'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
-        'md5': '5fa476a902e902783ac7a4d615cdbc7a',
+        'md5': '94e0e34fd58f169f40c184f232356cfe',
        'info_dict': {
            'id': '44699v',
            'ext': 'mp4',
            'title': 'Boys Over Flowers - Episode 1',
            'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
-            'duration': 4204,
+            'duration': 4172,
            'timestamp': 1270496524,
            'upload_date': '20100405',
            'uploader': 'group8',
            'like_count': int,
            'age_limit': 13,
-        }
+        },
        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
    }, {
        # youtube external
        'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@ -195,14 +201,15 @@ class VikiIE(VikiBaseIE):
            'uploader_id': 'ad14065n',
            'like_count': int,
            'age_limit': 13,
-        }
+        },
        'skip': 'Page not found!',
    }, {
        'url': 'http://www.viki.com/player/44699v',
        'only_matching': True,
    }, {
        # non-English description
        'url': 'http://www.viki.com/videos/158036v-love-in-magic',
-        'md5': '1713ae35df5a521b31f6dc40730e7c9c',
+        'md5': 'adf9e321a0ae5d0aace349efaaff7691',
        'info_dict': {
            'id': '158036v',
            'ext': 'mp4',
@ -218,71 +225,11 @@ class VikiIE(VikiBaseIE):
    def _real_extract(self, url):