mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 03:17:07 -05:00 
			
		
		
		
	[bbccouk] Convert to new subtitles system
I haven't found any video available outside the UK, so I haven't added tests. I have updated how the srt file is build, because (at least for www.bbc.co.uk/programmes/p02j9b69) the subtitles is inside 'span' elements.
This commit is contained in:
		@@ -2,12 +2,12 @@ from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .subtitles import SubtitlesInfoExtractor
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import ExtractorError
 | 
			
		||||
from ..compat import compat_HTTPError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BBCCoUkIE(SubtitlesInfoExtractor):
 | 
			
		||||
class BBCCoUkIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = 'bbc.co.uk'
 | 
			
		||||
    IE_DESC = 'BBC iPlayer'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
 | 
			
		||||
@@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 | 
			
		||||
            formats.extend(conn_formats)
 | 
			
		||||
        return formats
 | 
			
		||||
 | 
			
		||||
    def _extract_captions(self, media, programme_id):
 | 
			
		||||
    def _get_subtitles(self, media, programme_id):
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        for connection in self._extract_connections(media):
 | 
			
		||||
            captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
 | 
			
		||||
            lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
 | 
			
		||||
            ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
 | 
			
		||||
            srt = ''
 | 
			
		||||
 | 
			
		||||
            def _extract_text(p):
 | 
			
		||||
                if p.text is not None:
 | 
			
		||||
                    stripped_text = p.text.strip()
 | 
			
		||||
                    if stripped_text:
 | 
			
		||||
                        return stripped_text
 | 
			
		||||
                return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
 | 
			
		||||
            for pos, p in enumerate(ps):
 | 
			
		||||
                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
 | 
			
		||||
                                                          p.text.strip() if p.text is not None else '')
 | 
			
		||||
            subtitles[lang] = srt
 | 
			
		||||
                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
 | 
			
		||||
            subtitles[lang] = [
 | 
			
		||||
                {
 | 
			
		||||
                    'url': connection.get('href'),
 | 
			
		||||
                    'ext': 'ttml',
 | 
			
		||||
                },
 | 
			
		||||
                {
 | 
			
		||||
                    'data': srt,
 | 
			
		||||
                    'ext': 'srt',
 | 
			
		||||
                },
 | 
			
		||||
            ]
 | 
			
		||||
        return subtitles
 | 
			
		||||
 | 
			
		||||
    def _download_media_selector(self, programme_id):
 | 
			
		||||
@@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 | 
			
		||||
            elif kind == 'video':
 | 
			
		||||
                formats.extend(self._extract_video(media, programme_id))
 | 
			
		||||
            elif kind == 'captions':
 | 
			
		||||
                subtitles = self._extract_captions(media, programme_id)
 | 
			
		||||
                subtitles = self.extract_subtitles(media, programme_id)
 | 
			
		||||
 | 
			
		||||
        return formats, subtitles
 | 
			
		||||
 | 
			
		||||
@@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 | 
			
		||||
        else:
 | 
			
		||||
            programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
 | 
			
		||||
 | 
			
		||||
        if self._downloader.params.get('listsubtitles', False):
 | 
			
		||||
            self._list_available_subtitles(programme_id, subtitles)
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user