mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 04:37:06 -05:00 
			
		
		
		
	[lynda] Convert to new subtitles system
This commit is contained in:
		@@ -18,6 +18,7 @@ from youtube_dl.extractor import (
 | 
				
			|||||||
    VimeoIE,
 | 
					    VimeoIE,
 | 
				
			||||||
    WallaIE,
 | 
					    WallaIE,
 | 
				
			||||||
    CeskaTelevizeIE,
 | 
					    CeskaTelevizeIE,
 | 
				
			||||||
 | 
					    LyndaIE,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -304,5 +305,17 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
 | 
				
			|||||||
        self.assertEqual(len(subtitles), 0)
 | 
					        self.assertEqual(len(subtitles), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestLyndaSubtitles(BaseTestSubtitles):
 | 
				
			||||||
 | 
					    url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
 | 
				
			||||||
 | 
					    IE = LyndaIE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_allsubtitles(self):
 | 
				
			||||||
 | 
					        self.DL.params['writesubtitles'] = True
 | 
				
			||||||
 | 
					        self.DL.params['allsubtitles'] = True
 | 
				
			||||||
 | 
					        subtitles = self.getSubtitles()
 | 
				
			||||||
 | 
					        self.assertEqual(set(subtitles.keys()), set(['en']))
 | 
				
			||||||
 | 
					        self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
    unittest.main()
 | 
					    unittest.main()
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 | 
				
			|||||||
import re
 | 
					import re
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .subtitles import SubtitlesInfoExtractor
 | 
					 | 
				
			||||||
from .common import InfoExtractor
 | 
					from .common import InfoExtractor
 | 
				
			||||||
from ..compat import (
 | 
					from ..compat import (
 | 
				
			||||||
    compat_str,
 | 
					    compat_str,
 | 
				
			||||||
@@ -16,7 +15,7 @@ from ..utils import (
 | 
				
			|||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class LyndaIE(SubtitlesInfoExtractor):
 | 
					class LyndaIE(InfoExtractor):
 | 
				
			||||||
    IE_NAME = 'lynda'
 | 
					    IE_NAME = 'lynda'
 | 
				
			||||||
    IE_DESC = 'lynda.com videos'
 | 
					    IE_DESC = 'lynda.com videos'
 | 
				
			||||||
    _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
 | 
					    _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
 | 
				
			||||||
@@ -88,11 +87,7 @@ class LyndaIE(SubtitlesInfoExtractor):
 | 
				
			|||||||
        self._check_formats(formats, video_id)
 | 
					        self._check_formats(formats, video_id)
 | 
				
			||||||
        self._sort_formats(formats)
 | 
					        self._sort_formats(formats)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if self._downloader.params.get('listsubtitles', False):
 | 
					        subtitles = self.extract_subtitles(video_id, page)
 | 
				
			||||||
            self._list_available_subtitles(video_id, page)
 | 
					 | 
				
			||||||
            return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return {
 | 
					        return {
 | 
				
			||||||
            'id': video_id,
 | 
					            'id': video_id,
 | 
				
			||||||
@@ -144,38 +139,31 @@ class LyndaIE(SubtitlesInfoExtractor):
 | 
				
			|||||||
        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
 | 
					        if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
 | 
				
			||||||
            raise ExtractorError('Unable to log in')
 | 
					            raise ExtractorError('Unable to log in')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _fix_subtitles(self, subtitles):
 | 
					    def _fix_subtitles(self, subs):
 | 
				
			||||||
        if subtitles is None:
 | 
					        srt = ''
 | 
				
			||||||
            return subtitles  # subtitles not requested
 | 
					        for pos in range(0, len(subs) - 1):
 | 
				
			||||||
 | 
					            seq_current = subs[pos]
 | 
				
			||||||
        fixed_subtitles = {}
 | 
					            m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
 | 
				
			||||||
        for k, v in subtitles.items():
 | 
					            if m_current is None:
 | 
				
			||||||
            subs = json.loads(v)
 | 
					 | 
				
			||||||
            if len(subs) == 0:
 | 
					 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
            srt = ''
 | 
					            seq_next = subs[pos + 1]
 | 
				
			||||||
            for pos in range(0, len(subs) - 1):
 | 
					            m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
 | 
				
			||||||
                seq_current = subs[pos]
 | 
					            if m_next is None:
 | 
				
			||||||
                m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
 | 
					                continue
 | 
				
			||||||
                if m_current is None:
 | 
					            appear_time = m_current.group('timecode')
 | 
				
			||||||
                    continue
 | 
					            disappear_time = m_next.group('timecode')
 | 
				
			||||||
                seq_next = subs[pos + 1]
 | 
					            text = seq_current['Caption']
 | 
				
			||||||
                m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
 | 
					            srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
 | 
				
			||||||
                if m_next is None:
 | 
					        if srt:
 | 
				
			||||||
                    continue
 | 
					            return srt
 | 
				
			||||||
                appear_time = m_current.group('timecode')
 | 
					 | 
				
			||||||
                disappear_time = m_next.group('timecode')
 | 
					 | 
				
			||||||
                text = seq_current['Caption']
 | 
					 | 
				
			||||||
                srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
 | 
					 | 
				
			||||||
            if srt:
 | 
					 | 
				
			||||||
                fixed_subtitles[k] = srt
 | 
					 | 
				
			||||||
        return fixed_subtitles
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _get_available_subtitles(self, video_id, webpage):
 | 
					    def _get_subtitles(self, video_id, webpage):
 | 
				
			||||||
        url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
 | 
					        url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
 | 
				
			||||||
        sub = self._download_webpage(url, None, False)
 | 
					        subs = self._download_json(url, None, False)
 | 
				
			||||||
        sub_json = json.loads(sub)
 | 
					        if subs:
 | 
				
			||||||
        return {'en': url} if len(sub_json) > 0 else {}
 | 
					            return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class LyndaCourseIE(InfoExtractor):
 | 
					class LyndaCourseIE(InfoExtractor):
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user