mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 10:07:08 -05:00 
			
		
		
		
	[khanacademy] Add support (Fixes #2066)
This commit is contained in:
		@@ -1,7 +1,6 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Allow direct execution
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
@@ -30,6 +29,7 @@ from youtube_dl.extractor import (
 | 
			
		||||
    SmotriUserIE,
 | 
			
		||||
    IviCompilationIE,
 | 
			
		||||
    ImdbListIE,
 | 
			
		||||
    KhanAcademyIE,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -198,6 +198,16 @@ class TestPlaylists(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(result['title'], u'Animated and Family Films')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 48)
 | 
			
		||||
 | 
			
		||||
    def test_khanacademy_topic(self):
 | 
			
		||||
        dl = FakeYDL()
 | 
			
		||||
        ie = KhanAcademyIE(dl)
 | 
			
		||||
        result = ie.extract('https://www.khanacademy.org/math/applied-math/cryptography')
 | 
			
		||||
        self.assertIsPlaylist(result)
 | 
			
		||||
        self.assertEqual(result['id'], u'cryptography')
 | 
			
		||||
        self.assertEqual(result['title'], u'Journey into cryptography')
 | 
			
		||||
        self.assertEqual(result['description'], u'How have humans protected their secret messages through history? What has changed today?')
 | 
			
		||||
        self.assertTrue(len(result['entries']) >= 3)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -98,6 +98,7 @@ from .justintv import JustinTVIE
 | 
			
		||||
from .jpopsukitv import JpopsukiIE
 | 
			
		||||
from .kankan import KankanIE
 | 
			
		||||
from .keezmovies import KeezMoviesIE
 | 
			
		||||
from .khanacademy import KhanAcademyIE
 | 
			
		||||
from .kickstarter import KickStarterIE
 | 
			
		||||
from .keek import KeekIE
 | 
			
		||||
from .liveleak import LiveLeakIE
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,5 @@
 | 
			
		||||
import base64
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import socket
 | 
			
		||||
@@ -260,6 +261,15 @@ class InfoExtractor(object):
 | 
			
		||||
            xml_string = transform_source(xml_string)
 | 
			
		||||
        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
    def _download_json(self, url_or_request, video_id,
 | 
			
		||||
                       note=u'Downloading JSON metadata',
 | 
			
		||||
                       errnote=u'Unable to download JSON metadata'):
 | 
			
		||||
        json_string = self._download_webpage(url_or_request, video_id, note, errnote)
 | 
			
		||||
        try:
 | 
			
		||||
            return json.loads(json_string)
 | 
			
		||||
        except ValueError as ve:
 | 
			
		||||
            raise ExtractorError('Failed to download JSON', cause=ve)
 | 
			
		||||
 | 
			
		||||
    def report_warning(self, msg, video_id=None):
 | 
			
		||||
        idstr = u'' if video_id is None else u'%s: ' % video_id
 | 
			
		||||
        self._downloader.report_warning(
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										71
									
								
								youtube_dl/extractor/khanacademy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								youtube_dl/extractor/khanacademy.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,71 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class KhanAcademyIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
 | 
			
		||||
    IE_NAME = 'KhanAcademy'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.khanacademy.org/video/one-time-pad',
 | 
			
		||||
        'file': 'one-time-pad.mp4',
 | 
			
		||||
        'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'title': 'The one-time pad',
 | 
			
		||||
            'description': 'The perfect cipher',
 | 
			
		||||
            'duration': 176,
 | 
			
		||||
            'uploader': 'Brit Cruise',
 | 
			
		||||
            'upload_date': '20120411',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        m = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = m.group('id')
 | 
			
		||||
 | 
			
		||||
        if m.group('key') == 'video':
 | 
			
		||||
            data = self._download_json(
 | 
			
		||||
                'http://api.khanacademy.org/api/v1/videos/' + video_id,
 | 
			
		||||
                video_id, 'Downloading video info')
 | 
			
		||||
 | 
			
		||||
            upload_date = unified_strdate(data['date_added'])
 | 
			
		||||
            uploader = ', '.join(data['author_names'])
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'url_transparent',
 | 
			
		||||
                'url': data['url'],
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': data['title'],
 | 
			
		||||
                'thumbnail': data['image_url'],
 | 
			
		||||
                'duration': data['duration'],
 | 
			
		||||
                'description': data['description'],
 | 
			
		||||
                'uploader': uploader,
 | 
			
		||||
                'upload_date': upload_date,
 | 
			
		||||
            }
 | 
			
		||||
        else:
 | 
			
		||||
            # topic
 | 
			
		||||
            data = self._download_json(
 | 
			
		||||
                'http://api.khanacademy.org/api/v1/topic/' + video_id,
 | 
			
		||||
                video_id, 'Downloading topic info')
 | 
			
		||||
 | 
			
		||||
            entries = [
 | 
			
		||||
                {
 | 
			
		||||
                    '_type': 'url',
 | 
			
		||||
                    'url': c['url'],
 | 
			
		||||
                    'id': c['id'],
 | 
			
		||||
                    'title': c['title'],
 | 
			
		||||
                }
 | 
			
		||||
                for c in data['children'] if c['kind'] in ('Video', 'Topic')]
 | 
			
		||||
 | 
			
		||||
            return {
 | 
			
		||||
                '_type': 'playlist',
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': data['title'],
 | 
			
		||||
                'description': data['description'],
 | 
			
		||||
                'entries': entries,
 | 
			
		||||
            }
 | 
			
		||||
		Reference in New Issue
	
	Block a user