mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-03 20:07:08 -05:00 
			
		
		
		
	Merge branch 'master' into subtitles_rework
This commit is contained in:
		
							
								
								
									
										19
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								README.md
									
									
									
									
									
								
							@@ -113,25 +113,28 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
 | 
			
		||||
## Video Format Options:
 | 
			
		||||
    -f, --format FORMAT        video format code, specifiy the order of
 | 
			
		||||
                               preference using slashes: "-f 22/17/18"
 | 
			
		||||
                               preference using slashes: "-f 22/17/18". "-f mp4"
 | 
			
		||||
                               and "-f flv" are also supported
 | 
			
		||||
    --all-formats              download all available video formats
 | 
			
		||||
    --prefer-free-formats      prefer free video formats unless a specific one
 | 
			
		||||
                               is requested
 | 
			
		||||
    --max-quality FORMAT       highest quality format to download
 | 
			
		||||
    -F, --list-formats         list all available formats (currently youtube
 | 
			
		||||
                               only)
 | 
			
		||||
 | 
			
		||||
## Subtitle Options:
 | 
			
		||||
    --write-sub                write subtitle file (currently youtube only)
 | 
			
		||||
    --write-auto-sub           write automatic subtitle file (currently youtube
 | 
			
		||||
                               only)
 | 
			
		||||
    --only-sub                 [deprecated] alias of --skip-download
 | 
			
		||||
    --all-subs                 downloads all the available subtitles of the
 | 
			
		||||
                               video (currently youtube only)
 | 
			
		||||
                               video
 | 
			
		||||
    --list-subs                lists all available subtitles for the video
 | 
			
		||||
                               (currently youtube only)
 | 
			
		||||
    --sub-format FORMAT        subtitle format [srt/sbv/vtt] (default=srt)
 | 
			
		||||
                               (currently youtube only)
 | 
			
		||||
    --sub-lang LANG            language of the subtitles to download (optional)
 | 
			
		||||
                               use IETF language tags like 'en'
 | 
			
		||||
    --sub-format FORMAT        subtitle format (default=srt) ([sbv/vtt] youtube
 | 
			
		||||
                               only)
 | 
			
		||||
    --sub-lang LANGS           languages of the subtitles to download (optional)
 | 
			
		||||
                               separated by commas, use IETF language tags like
 | 
			
		||||
                               'en,pt'
 | 
			
		||||
 | 
			
		||||
## Authentication Options:
 | 
			
		||||
    -u, --username USERNAME    account username
 | 
			
		||||
@@ -153,6 +156,8 @@ which means you can modify it, redistribute it or use it however you like.
 | 
			
		||||
                               processing; the video is erased by default
 | 
			
		||||
    --no-post-overwrites       do not overwrite post-processed files; the post-
 | 
			
		||||
                               processed files are overwritten by default
 | 
			
		||||
    --embed-subs               embed subtitles in the video (only for mp4
 | 
			
		||||
                               videos)
 | 
			
		||||
 | 
			
		||||
# CONFIGURATION
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -4,8 +4,12 @@ __youtube-dl()
 | 
			
		||||
    COMPREPLY=()
 | 
			
		||||
    cur="${COMP_WORDS[COMP_CWORD]}"
 | 
			
		||||
    opts="{{flags}}"
 | 
			
		||||
    keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater"
 | 
			
		||||
 | 
			
		||||
    if [[ ${cur} == * ]] ; then
 | 
			
		||||
    if [[ ${cur} =~ : ]]; then
 | 
			
		||||
        COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) )
 | 
			
		||||
        return 0
 | 
			
		||||
    elif [[ ${cur} == * ]] ; then
 | 
			
		||||
        COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
 | 
			
		||||
        return 0
 | 
			
		||||
    fi
 | 
			
		||||
 
 | 
			
		||||
@@ -6,28 +6,35 @@ import hashlib
 | 
			
		||||
import urllib.request
 | 
			
		||||
 | 
			
		||||
if len(sys.argv) <= 1:
 | 
			
		||||
	print('Specify the version number as parameter')
 | 
			
		||||
	sys.exit()
 | 
			
		||||
    print('Specify the version number as parameter')
 | 
			
		||||
    sys.exit()
 | 
			
		||||
version = sys.argv[1]
 | 
			
		||||
 | 
			
		||||
with open('update/LATEST_VERSION', 'w') as f:
 | 
			
		||||
	f.write(version)
 | 
			
		||||
    f.write(version)
 | 
			
		||||
 | 
			
		||||
versions_info = json.load(open('update/versions.json'))
 | 
			
		||||
if 'signature' in versions_info:
 | 
			
		||||
	del versions_info['signature']
 | 
			
		||||
    del versions_info['signature']
 | 
			
		||||
 | 
			
		||||
new_version = {}
 | 
			
		||||
 | 
			
		||||
filenames = {'bin': 'youtube-dl', 'exe': 'youtube-dl.exe', 'tar': 'youtube-dl-%s.tar.gz' % version}
 | 
			
		||||
filenames = {
 | 
			
		||||
    'bin': 'youtube-dl',
 | 
			
		||||
    'exe': 'youtube-dl.exe',
 | 
			
		||||
    'tar': 'youtube-dl-%s.tar.gz' % version}
 | 
			
		||||
build_dir = os.path.join('..', '..', 'build', version)
 | 
			
		||||
for key, filename in filenames.items():
 | 
			
		||||
	print('Downloading and checksumming %s...' %filename)
 | 
			
		||||
	url = 'http://youtube-dl.org/downloads/%s/%s' % (version, filename)
 | 
			
		||||
	data = urllib.request.urlopen(url).read()
 | 
			
		||||
	sha256sum = hashlib.sha256(data).hexdigest()
 | 
			
		||||
	new_version[key] = (url, sha256sum)
 | 
			
		||||
    fn = os.path.join(build_dir, filename)
 | 
			
		||||
    with open(fn, 'rb') as f:
 | 
			
		||||
        data = f.read()
 | 
			
		||||
    if not data:
 | 
			
		||||
        raise ValueError('File %s is empty!' % fn)
 | 
			
		||||
    sha256sum = hashlib.sha256(data).hexdigest()
 | 
			
		||||
    new_version[key] = (url, sha256sum)
 | 
			
		||||
 | 
			
		||||
versions_info['versions'][version] = new_version
 | 
			
		||||
versions_info['latest'] = version
 | 
			
		||||
 | 
			
		||||
json.dump(versions_info, open('update/versions.json', 'w'), indent=4, sort_keys=True)
 | 
			
		||||
with open('update/versions.json', 'w') as jsonf:
 | 
			
		||||
    json.dump(versions_info, jsonf, indent=4, sort_keys=True)
 | 
			
		||||
 
 | 
			
		||||
@@ -22,7 +22,7 @@ entry_template=textwrap.dedent("""
 | 
			
		||||
									<atom:link href="http://rg3.github.io/youtube-dl" />
 | 
			
		||||
									<atom:content type="xhtml">
 | 
			
		||||
										<div xmlns="http://www.w3.org/1999/xhtml">
 | 
			
		||||
											Downloads available at <a href="http://youtube-dl.org/downloads/@VERSION@/">http://youtube-dl.org/downloads/@VERSION@/</a>
 | 
			
		||||
											Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
 | 
			
		||||
										</div>
 | 
			
		||||
									</atom:content>
 | 
			
		||||
									<atom:author>
 | 
			
		||||
@@ -54,4 +54,3 @@ atom_template = atom_template.replace('@ENTRIES@', entries_str)
 | 
			
		||||
with open('update/releases.atom','w',encoding='utf-8') as atom_file:
 | 
			
		||||
	atom_file.write(atom_template)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										33
									
								
								devscripts/gh-pages/update-sites.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										33
									
								
								devscripts/gh-pages/update-sites.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,33 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
import os
 | 
			
		||||
import textwrap
 | 
			
		||||
 | 
			
		||||
# We must be able to import youtube_dl
 | 
			
		||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 | 
			
		||||
 | 
			
		||||
import youtube_dl
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
 | 
			
		||||
        template = tmplf.read()
 | 
			
		||||
 | 
			
		||||
    ie_htmls = []
 | 
			
		||||
    for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
 | 
			
		||||
        ie_html = '<b>{}</b>'.format(ie.IE_NAME)
 | 
			
		||||
        try:
 | 
			
		||||
            ie_html += ': {}'.format(ie.IE_DESC)
 | 
			
		||||
        except AttributeError:
 | 
			
		||||
            pass
 | 
			
		||||
        if ie.working() == False:
 | 
			
		||||
            ie_html += ' (Currently broken)'
 | 
			
		||||
        ie_htmls.append('<li>{}</li>'.format(ie_html))
 | 
			
		||||
 | 
			
		||||
    template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
 | 
			
		||||
 | 
			
		||||
    with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
 | 
			
		||||
        sitesf.write(template)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    main()
 | 
			
		||||
@@ -67,7 +67,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
 | 
			
		||||
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
 | 
			
		||||
git checkout HEAD -- youtube-dl youtube-dl.exe
 | 
			
		||||
 | 
			
		||||
/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
 | 
			
		||||
/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..."
 | 
			
		||||
for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
 | 
			
		||||
scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
 | 
			
		||||
ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
 | 
			
		||||
@@ -85,6 +85,7 @@ ROOT=$(pwd)
 | 
			
		||||
    "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
 | 
			
		||||
    "$ROOT/devscripts/gh-pages/generate-download.py"
 | 
			
		||||
    "$ROOT/devscripts/gh-pages/update-copyright.py"
 | 
			
		||||
    "$ROOT/devscripts/gh-pages/update-sites.py"
 | 
			
		||||
    git add *.html *.html.in update
 | 
			
		||||
    git commit -m "release $version"
 | 
			
		||||
    git show HEAD
 | 
			
		||||
 
 | 
			
		||||
@@ -14,21 +14,21 @@ tests = [
 | 
			
		||||
    # 89 
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
 | 
			
		||||
     "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
 | 
			
		||||
    # 88
 | 
			
		||||
    # 88 - vflapUV9V 2013/08/28
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
 | 
			
		||||
     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
 | 
			
		||||
     "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
 | 
			
		||||
    # 87
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
 | 
			
		||||
     "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
 | 
			
		||||
    # 86
 | 
			
		||||
    # 86 - vfluy6kdb 2013/09/06
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
 | 
			
		||||
     "yuioplkjhgfdsazecvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<"),
 | 
			
		||||
     "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
 | 
			
		||||
    # 85
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
 | 
			
		||||
     ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
 | 
			
		||||
    # 84
 | 
			
		||||
    # 84 - vflg0g8PQ 2013/08/29 (sporadic)
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
 | 
			
		||||
     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
 | 
			
		||||
     ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
 | 
			
		||||
    # 83
 | 
			
		||||
    ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
 | 
			
		||||
     ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
 | 
			
		||||
 
 | 
			
		||||
@@ -11,24 +11,49 @@ from youtube_dl.extractor import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE,
 | 
			
		||||
from helper import get_testcases
 | 
			
		||||
 | 
			
		||||
class TestAllURLsMatching(unittest.TestCase):
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        self.ies = gen_extractors()
 | 
			
		||||
 | 
			
		||||
    def matching_ies(self, url):
 | 
			
		||||
        return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic']
 | 
			
		||||
 | 
			
		||||
    def assertMatch(self, url, ie_list):
 | 
			
		||||
        self.assertEqual(self.matching_ies(url), ie_list)
 | 
			
		||||
 | 
			
		||||
    def test_youtube_playlist_matching(self):
 | 
			
		||||
        self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
 | 
			
		||||
        self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
 | 
			
		||||
        self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
 | 
			
		||||
        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
 | 
			
		||||
        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
 | 
			
		||||
        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
 | 
			
		||||
        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
 | 
			
		||||
        self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
 | 
			
		||||
        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
 | 
			
		||||
        assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
 | 
			
		||||
        assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
 | 
			
		||||
        assertPlaylist(u'PL63F0C78739B09958')
 | 
			
		||||
        assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
 | 
			
		||||
        assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
 | 
			
		||||
        assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
 | 
			
		||||
        assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
 | 
			
		||||
        self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
 | 
			
		||||
 | 
			
		||||
    def test_youtube_matching(self):
 | 
			
		||||
        self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
 | 
			
		||||
        self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
 | 
			
		||||
        self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
 | 
			
		||||
        self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
 | 
			
		||||
 | 
			
		||||
    def test_youtube_channel_matching(self):
 | 
			
		||||
        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
 | 
			
		||||
        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
 | 
			
		||||
        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
 | 
			
		||||
        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
 | 
			
		||||
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
 | 
			
		||||
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
 | 
			
		||||
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
 | 
			
		||||
 | 
			
		||||
    def test_youtube_user_matching(self):
 | 
			
		||||
        self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
 | 
			
		||||
 | 
			
		||||
    def test_youtube_feeds(self):
 | 
			
		||||
        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
 | 
			
		||||
        self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
 | 
			
		||||
        self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
 | 
			
		||||
        self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
 | 
			
		||||
 | 
			
		||||
    def test_youtube_show_matching(self):
 | 
			
		||||
        self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
 | 
			
		||||
 | 
			
		||||
    def test_justin_tv_channelid_matching(self):
 | 
			
		||||
        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
 | 
			
		||||
@@ -63,15 +88,12 @@ class TestAllURLsMatching(unittest.TestCase):
 | 
			
		||||
                    self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url))
 | 
			
		||||
 | 
			
		||||
    def test_keywords(self):
 | 
			
		||||
        ies = gen_extractors()
 | 
			
		||||
        matching_ies = lambda url: [ie.IE_NAME for ie in ies
 | 
			
		||||
                                    if ie.suitable(url) and ie.IE_NAME != 'generic']
 | 
			
		||||
        self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions'])
 | 
			
		||||
        self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions'])
 | 
			
		||||
        self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral'])
 | 
			
		||||
        self.assertEqual(matching_ies(':tds'), ['ComedyCentral'])
 | 
			
		||||
        self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral'])
 | 
			
		||||
        self.assertEqual(matching_ies(':cr'), ['ComedyCentral'])
 | 
			
		||||
        self.assertMatch(':ytsubs', ['youtube:subscriptions'])
 | 
			
		||||
        self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
 | 
			
		||||
        self.assertMatch(':thedailyshow', ['ComedyCentral'])
 | 
			
		||||
        self.assertMatch(':tds', ['ComedyCentral'])
 | 
			
		||||
        self.assertMatch(':colbertreport', ['ComedyCentral'])
 | 
			
		||||
        self.assertMatch(':cr', ['ComedyCentral'])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
 
 | 
			
		||||
@@ -127,12 +127,11 @@ def generator(test_case):
 | 
			
		||||
                    info_dict = json.load(infof)
 | 
			
		||||
                for (info_field, expected) in tc.get('info_dict', {}).items():
 | 
			
		||||
                    if isinstance(expected, compat_str) and expected.startswith('md5:'):
 | 
			
		||||
                        self.assertEqual(expected, 'md5:' + md5(info_dict.get(info_field)))
 | 
			
		||||
                        got = 'md5:' + md5(info_dict.get(info_field))
 | 
			
		||||
                    else:
 | 
			
		||||
                        got = info_dict.get(info_field)
 | 
			
		||||
                        self.assertEqual(
 | 
			
		||||
                            expected, got,
 | 
			
		||||
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
 | 
			
		||||
                    self.assertEqual(expected, got,
 | 
			
		||||
                        u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
 | 
			
		||||
 | 
			
		||||
                # If checkable fields are missing from the test case, print the info_dict
 | 
			
		||||
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
 | 
			
		||||
 
 | 
			
		||||
@@ -63,6 +63,17 @@ class FileDownloader(object):
 | 
			
		||||
        converted = float(bytes) / float(1024 ** exponent)
 | 
			
		||||
        return '%.2f%s' % (converted, suffix)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def format_seconds(seconds):
 | 
			
		||||
        (mins, secs) = divmod(seconds, 60)
 | 
			
		||||
        (hours, eta_mins) = divmod(mins, 60)
 | 
			
		||||
        if hours > 99:
 | 
			
		||||
            return '--:--:--'
 | 
			
		||||
        if hours == 0:
 | 
			
		||||
            return '%02d:%02d' % (mins, secs)
 | 
			
		||||
        else:
 | 
			
		||||
            return '%02d:%02d:%02d' % (hours, mins, secs)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def calc_percent(byte_counter, data_len):
 | 
			
		||||
        if data_len is None:
 | 
			
		||||
@@ -78,14 +89,7 @@ class FileDownloader(object):
 | 
			
		||||
            return '--:--'
 | 
			
		||||
        rate = float(current) / dif
 | 
			
		||||
        eta = int((float(total) - float(current)) / rate)
 | 
			
		||||
        (eta_mins, eta_secs) = divmod(eta, 60)
 | 
			
		||||
        (eta_hours, eta_mins) = divmod(eta_mins, 60)
 | 
			
		||||
        if eta_hours > 99:
 | 
			
		||||
            return '--:--:--'
 | 
			
		||||
        if eta_hours == 0:
 | 
			
		||||
            return '%02d:%02d' % (eta_mins, eta_secs)
 | 
			
		||||
        else:
 | 
			
		||||
            return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
 | 
			
		||||
        return FileDownloader.format_seconds(eta)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def calc_speed(start, now, bytes):
 | 
			
		||||
@@ -234,12 +238,14 @@ class FileDownloader(object):
 | 
			
		||||
        """Report it was impossible to resume download."""
 | 
			
		||||
        self.to_screen(u'[download] Unable to resume')
 | 
			
		||||
 | 
			
		||||
    def report_finish(self):
 | 
			
		||||
    def report_finish(self, data_len_str, tot_time):
 | 
			
		||||
        """Report download finished."""
 | 
			
		||||
        if self.params.get('noprogress', False):
 | 
			
		||||
            self.to_screen(u'[download] Download completed')
 | 
			
		||||
        else:
 | 
			
		||||
            self.to_screen(u'')
 | 
			
		||||
            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
 | 
			
		||||
            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
 | 
			
		||||
                (clear_line, data_len_str, self.format_seconds(tot_time)))
 | 
			
		||||
 | 
			
		||||
    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
 | 
			
		||||
        self.report_destination(filename)
 | 
			
		||||
@@ -542,7 +548,7 @@ class FileDownloader(object):
 | 
			
		||||
            self.report_error(u'Did not get any data blocks')
 | 
			
		||||
            return False
 | 
			
		||||
        stream.close()
 | 
			
		||||
        self.report_finish()
 | 
			
		||||
        self.report_finish(data_len_str, (time.time() - start))
 | 
			
		||||
        if data_len is not None and byte_counter != data_len:
 | 
			
		||||
            raise ContentTooShortError(byte_counter, int(data_len))
 | 
			
		||||
        self.try_rename(tmpfilename, filename)
 | 
			
		||||
 
 | 
			
		||||
@@ -137,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 | 
			
		||||
        try:
 | 
			
		||||
            FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
 | 
			
		||||
        except FFmpegPostProcessorError as err:
 | 
			
		||||
            raise AudioConversionError(err.message)
 | 
			
		||||
            raise AudioConversionError(err.msg)
 | 
			
		||||
 | 
			
		||||
    def run(self, information):
 | 
			
		||||
        path = information['filepath']
 | 
			
		||||
@@ -207,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
 | 
			
		||||
        except:
 | 
			
		||||
            etype,e,tb = sys.exc_info()
 | 
			
		||||
            if isinstance(e, AudioConversionError):
 | 
			
		||||
                msg = u'audio conversion failed: ' + e.message
 | 
			
		||||
                msg = u'audio conversion failed: ' + e.msg
 | 
			
		||||
            else:
 | 
			
		||||
                msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
 | 
			
		||||
            raise PostProcessingError(msg)
 | 
			
		||||
@@ -458,6 +458,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 | 
			
		||||
        opts.extend(['-f', 'mp4'])
 | 
			
		||||
 | 
			
		||||
        temp_filename = filename + u'.temp'
 | 
			
		||||
        self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
 | 
			
		||||
        self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
 | 
			
		||||
        os.remove(encodeFilename(filename))
 | 
			
		||||
        os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 | 
			
		||||
 
 | 
			
		||||
@@ -76,7 +76,7 @@ class YoutubeDL(object):
 | 
			
		||||
    allsubtitles:      Downloads all the subtitles of the video
 | 
			
		||||
    listsubtitles:     Lists all available subtitles for the video
 | 
			
		||||
    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 | 
			
		||||
    subtitleslangs:     Language of the subtitles to download
 | 
			
		||||
    subtitleslangs:    List of languages of the subtitles to download
 | 
			
		||||
    keepvideo:         Keep the video file after post-processing
 | 
			
		||||
    daterange:         A DateRange object, download only if the upload_date is in the range.
 | 
			
		||||
    skip_download:     Skip the actual download of the video file
 | 
			
		||||
@@ -97,6 +97,7 @@ class YoutubeDL(object):
 | 
			
		||||
    def __init__(self, params):
 | 
			
		||||
        """Create a FileDownloader object with the given options."""
 | 
			
		||||
        self._ies = []
 | 
			
		||||
        self._ies_instances = {}
 | 
			
		||||
        self._pps = []
 | 
			
		||||
        self._progress_hooks = []
 | 
			
		||||
        self._download_retcode = 0
 | 
			
		||||
@@ -111,8 +112,21 @@ class YoutubeDL(object):
 | 
			
		||||
    def add_info_extractor(self, ie):
 | 
			
		||||
        """Add an InfoExtractor object to the end of the list."""
 | 
			
		||||
        self._ies.append(ie)
 | 
			
		||||
        self._ies_instances[ie.ie_key()] = ie
 | 
			
		||||
        ie.set_downloader(self)
 | 
			
		||||
 | 
			
		||||
    def get_info_extractor(self, ie_key):
 | 
			
		||||
        """
 | 
			
		||||
        Get an instance of an IE with name ie_key, it will try to get one from
 | 
			
		||||
        the _ies list, if there's no instance it will create a new one and add
 | 
			
		||||
        it to the extractor list.
 | 
			
		||||
        """
 | 
			
		||||
        ie = self._ies_instances.get(ie_key)
 | 
			
		||||
        if ie is None:
 | 
			
		||||
            ie = get_info_extractor(ie_key)()
 | 
			
		||||
            self.add_info_extractor(ie)
 | 
			
		||||
        return ie
 | 
			
		||||
 | 
			
		||||
    def add_default_info_extractors(self):
 | 
			
		||||
        """
 | 
			
		||||
        Add the InfoExtractors returned by gen_extractors to the end of the list
 | 
			
		||||
@@ -294,9 +308,7 @@ class YoutubeDL(object):
 | 
			
		||||
         '''
 | 
			
		||||
        
 | 
			
		||||
        if ie_key:
 | 
			
		||||
            ie = get_info_extractor(ie_key)()
 | 
			
		||||
            ie.set_downloader(self)
 | 
			
		||||
            ies = [ie]
 | 
			
		||||
            ies = [self.get_info_extractor(ie_key)]
 | 
			
		||||
        else:
 | 
			
		||||
            ies = self._ies
 | 
			
		||||
 | 
			
		||||
@@ -448,7 +460,8 @@ class YoutubeDL(object):
 | 
			
		||||
        if self.params.get('forceid', False):
 | 
			
		||||
            compat_print(info_dict['id'])
 | 
			
		||||
        if self.params.get('forceurl', False):
 | 
			
		||||
            compat_print(info_dict['url'])
 | 
			
		||||
            # For RTMP URLs, also include the playpath
 | 
			
		||||
            compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 | 
			
		||||
        if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 | 
			
		||||
            compat_print(info_dict['thumbnail'])
 | 
			
		||||
        if self.params.get('forcedescription', False) and 'description' in info_dict:
 | 
			
		||||
 
 | 
			
		||||
@@ -28,6 +28,7 @@ __authors__  = (
 | 
			
		||||
    'Axel Noack',
 | 
			
		||||
    'Albert Kim',
 | 
			
		||||
    'Pierre Rudloff',
 | 
			
		||||
    'Huarong Huo',
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
__license__ = 'Public Domain'
 | 
			
		||||
@@ -45,6 +46,7 @@ import sys
 | 
			
		||||
import warnings
 | 
			
		||||
import platform
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from .utils import *
 | 
			
		||||
from .update import update_self
 | 
			
		||||
from .version import __version__
 | 
			
		||||
@@ -99,6 +101,16 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
            pass
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def _hide_login_info(opts):
 | 
			
		||||
        opts = list(opts)
 | 
			
		||||
        for private_opt in ['-p', '--password', '-u', '--username']:
 | 
			
		||||
            try:
 | 
			
		||||
                i = opts.index(private_opt)
 | 
			
		||||
                opts[i+1] = '<PRIVATE>'
 | 
			
		||||
            except ValueError:
 | 
			
		||||
                pass
 | 
			
		||||
        return opts
 | 
			
		||||
 | 
			
		||||
    max_width = 80
 | 
			
		||||
    max_help_position = 80
 | 
			
		||||
 | 
			
		||||
@@ -181,7 +193,7 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
 | 
			
		||||
    video_format.add_option('-f', '--format',
 | 
			
		||||
            action='store', dest='format', metavar='FORMAT',
 | 
			
		||||
            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
 | 
			
		||||
            help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
 | 
			
		||||
    video_format.add_option('--all-formats',
 | 
			
		||||
            action='store_const', dest='format', help='download all available video formats', const='all')
 | 
			
		||||
    video_format.add_option('--prefer-free-formats',
 | 
			
		||||
@@ -354,9 +366,9 @@ def parseOpts(overrideArguments=None):
 | 
			
		||||
        argv = systemConf + userConf + commandLineConf
 | 
			
		||||
        opts, args = parser.parse_args(argv)
 | 
			
		||||
        if opts.verbose:
 | 
			
		||||
            sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
 | 
			
		||||
            sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
 | 
			
		||||
            sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
 | 
			
		||||
            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
 | 
			
		||||
            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
 | 
			
		||||
            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
 | 
			
		||||
 | 
			
		||||
    return parser, opts, args
 | 
			
		||||
 | 
			
		||||
@@ -427,6 +439,10 @@ def _real_main(argv=None):
 | 
			
		||||
    proxy_handler = compat_urllib_request.ProxyHandler(proxies)
 | 
			
		||||
    https_handler = make_HTTPS_handler(opts)
 | 
			
		||||
    opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
 | 
			
		||||
    # Delete the default user-agent header, which would otherwise apply in
 | 
			
		||||
    # cases where our custom HTTP handler doesn't come into play
 | 
			
		||||
    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
 | 
			
		||||
    opener.addheaders =[]
 | 
			
		||||
    compat_urllib_request.install_opener(opener)
 | 
			
		||||
    socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
 | 
			
		||||
 | 
			
		||||
@@ -604,7 +620,7 @@ def _real_main(argv=None):
 | 
			
		||||
                sys.exc_clear()
 | 
			
		||||
            except:
 | 
			
		||||
                pass
 | 
			
		||||
        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
 | 
			
		||||
        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
 | 
			
		||||
        sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
 | 
			
		||||
 | 
			
		||||
    ydl.add_default_info_extractors()
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										202
									
								
								youtube_dl/aes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										202
									
								
								youtube_dl/aes.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,202 @@
 | 
			
		||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
 | 
			
		||||
 | 
			
		||||
import base64
 | 
			
		||||
from math import ceil
 | 
			
		||||
 | 
			
		||||
from .utils import bytes_to_intlist, intlist_to_bytes
 | 
			
		||||
 | 
			
		||||
BLOCK_SIZE_BYTES = 16
 | 
			
		||||
 | 
			
		||||
def aes_ctr_decrypt(data, key, counter):
 | 
			
		||||
    """
 | 
			
		||||
    Decrypt with aes in counter mode
 | 
			
		||||
    
 | 
			
		||||
    @param {int[]} data        cipher
 | 
			
		||||
    @param {int[]} key         16/24/32-Byte cipher key
 | 
			
		||||
    @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block)
 | 
			
		||||
                               returns the next counter block
 | 
			
		||||
    @returns {int[]}           decrypted data
 | 
			
		||||
    """
 | 
			
		||||
    expanded_key = key_expansion(key)
 | 
			
		||||
    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
 | 
			
		||||
    
 | 
			
		||||
    decrypted_data=[]
 | 
			
		||||
    for i in range(block_count):
 | 
			
		||||
        counter_block = counter.next_value()
 | 
			
		||||
        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
 | 
			
		||||
        block += [0]*(BLOCK_SIZE_BYTES - len(block))
 | 
			
		||||
        
 | 
			
		||||
        cipher_counter_block = aes_encrypt(counter_block, expanded_key)
 | 
			
		||||
        decrypted_data += xor(block, cipher_counter_block)
 | 
			
		||||
    decrypted_data = decrypted_data[:len(data)]
 | 
			
		||||
    
 | 
			
		||||
    return decrypted_data
 | 
			
		||||
 | 
			
		||||
def key_expansion(data):
 | 
			
		||||
    """
 | 
			
		||||
    Generate key schedule
 | 
			
		||||
    
 | 
			
		||||
    @param {int[]} data  16/24/32-Byte cipher key
 | 
			
		||||
    @returns {int[]}     176/208/240-Byte expanded key 
 | 
			
		||||
    """
 | 
			
		||||
    data = data[:] # copy
 | 
			
		||||
    rcon_iteration = 1
 | 
			
		||||
    key_size_bytes = len(data)
 | 
			
		||||
    expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
 | 
			
		||||
    
 | 
			
		||||
    while len(data) < expanded_key_size_bytes:
 | 
			
		||||
        temp = data[-4:]
 | 
			
		||||
        temp = key_schedule_core(temp, rcon_iteration)
 | 
			
		||||
        rcon_iteration += 1
 | 
			
		||||
        data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
 | 
			
		||||
        
 | 
			
		||||
        for _ in range(3):
 | 
			
		||||
            temp = data[-4:]
 | 
			
		||||
            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
 | 
			
		||||
        
 | 
			
		||||
        if key_size_bytes == 32:
 | 
			
		||||
            temp = data[-4:]
 | 
			
		||||
            temp = sub_bytes(temp)
 | 
			
		||||
            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
 | 
			
		||||
        
 | 
			
		||||
        for _ in range(3 if key_size_bytes == 32  else 2 if key_size_bytes == 24 else 0):
 | 
			
		||||
            temp = data[-4:]
 | 
			
		||||
            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
 | 
			
		||||
    data = data[:expanded_key_size_bytes]
 | 
			
		||||
    
 | 
			
		||||
    return data
 | 
			
		||||
 | 
			
		||||
def aes_encrypt(data, expanded_key):
 | 
			
		||||
    """
 | 
			
		||||
    Encrypt one block with aes
 | 
			
		||||
    
 | 
			
		||||
    @param {int[]} data          16-Byte state
 | 
			
		||||
    @param {int[]} expanded_key  176/208/240-Byte expanded key 
 | 
			
		||||
    @returns {int[]}             16-Byte cipher
 | 
			
		||||
    """
 | 
			
		||||
    rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
 | 
			
		||||
    
 | 
			
		||||
    data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
 | 
			
		||||
    for i in range(1, rounds+1):
 | 
			
		||||
        data = sub_bytes(data)
 | 
			
		||||
        data = shift_rows(data)
 | 
			
		||||
        if i != rounds:
 | 
			
		||||
            data = mix_columns(data)
 | 
			
		||||
        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
 | 
			
		||||
    
 | 
			
		||||
    return data
 | 
			
		||||
 | 
			
		||||
def aes_decrypt_text(data, password, key_size_bytes):
 | 
			
		||||
    """
 | 
			
		||||
    Decrypt text
 | 
			
		||||
    - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
 | 
			
		||||
    - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
 | 
			
		||||
      with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
 | 
			
		||||
    - Mode of operation is 'counter'
 | 
			
		||||
    
 | 
			
		||||
    @param {str} data                    Base64 encoded string
 | 
			
		||||
    @param {str,unicode} password        Password (will be encoded with utf-8)
 | 
			
		||||
    @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
 | 
			
		||||
    @returns {str}                       Decrypted data
 | 
			
		||||
    """
 | 
			
		||||
    NONCE_LENGTH_BYTES = 8
 | 
			
		||||
    
 | 
			
		||||
    data = bytes_to_intlist(base64.b64decode(data))
 | 
			
		||||
    password = bytes_to_intlist(password.encode('utf-8'))
 | 
			
		||||
    
 | 
			
		||||
    key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
 | 
			
		||||
    key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
 | 
			
		||||
    
 | 
			
		||||
    nonce = data[:NONCE_LENGTH_BYTES]
 | 
			
		||||
    cipher = data[NONCE_LENGTH_BYTES:]
 | 
			
		||||
    
 | 
			
		||||
    class Counter:
 | 
			
		||||
        __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
 | 
			
		||||
        def next_value(self):
 | 
			
		||||
            temp = self.__value
 | 
			
		||||
            self.__value = inc(self.__value)
 | 
			
		||||
            return temp
 | 
			
		||||
    
 | 
			
		||||
    decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
 | 
			
		||||
    plaintext = intlist_to_bytes(decrypted_data)
 | 
			
		||||
    
 | 
			
		||||
    return plaintext
 | 
			
		||||
 | 
			
		||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
 | 
			
		||||
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
 | 
			
		||||
        0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
 | 
			
		||||
        0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
 | 
			
		||||
        0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
 | 
			
		||||
        0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
 | 
			
		||||
        0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
 | 
			
		||||
        0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
 | 
			
		||||
        0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
 | 
			
		||||
        0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
 | 
			
		||||
        0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
 | 
			
		||||
        0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
 | 
			
		||||
        0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
 | 
			
		||||
        0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
 | 
			
		||||
        0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
 | 
			
		||||
        0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
 | 
			
		||||
        0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
 | 
			
		||||
MIX_COLUMN_MATRIX = ((2,3,1,1),
 | 
			
		||||
                     (1,2,3,1),
 | 
			
		||||
                     (1,1,2,3),
 | 
			
		||||
                     (3,1,1,2))
 | 
			
		||||
 | 
			
		||||
def sub_bytes(data):
 | 
			
		||||
    return [SBOX[x] for x in data]
 | 
			
		||||
 | 
			
		||||
def rotate(data):
 | 
			
		||||
    return data[1:] + [data[0]]
 | 
			
		||||
 | 
			
		||||
def key_schedule_core(data, rcon_iteration):
 | 
			
		||||
    data = rotate(data)
 | 
			
		||||
    data = sub_bytes(data)
 | 
			
		||||
    data[0] = data[0] ^ RCON[rcon_iteration]
 | 
			
		||||
    
 | 
			
		||||
    return data
 | 
			
		||||
 | 
			
		||||
def xor(data1, data2):
 | 
			
		||||
    return [x^y for x, y in zip(data1, data2)]
 | 
			
		||||
 | 
			
		||||
def mix_column(data):
 | 
			
		||||
    data_mixed = []
 | 
			
		||||
    for row in range(4):
 | 
			
		||||
        mixed = 0
 | 
			
		||||
        for column in range(4):
 | 
			
		||||
            addend = data[column]
 | 
			
		||||
            if MIX_COLUMN_MATRIX[row][column] in (2,3):
 | 
			
		||||
                addend <<= 1
 | 
			
		||||
                if addend > 0xff:
 | 
			
		||||
                    addend &= 0xff
 | 
			
		||||
                    addend ^= 0x1b
 | 
			
		||||
                if MIX_COLUMN_MATRIX[row][column] == 3:
 | 
			
		||||
                    addend ^= data[column]
 | 
			
		||||
            mixed ^= addend & 0xff
 | 
			
		||||
        data_mixed.append(mixed)
 | 
			
		||||
    return data_mixed
 | 
			
		||||
 | 
			
		||||
def mix_columns(data):
 | 
			
		||||
    data_mixed = []
 | 
			
		||||
    for i in range(4):
 | 
			
		||||
        column = data[i*4 : (i+1)*4]
 | 
			
		||||
        data_mixed += mix_column(column)
 | 
			
		||||
    return data_mixed
 | 
			
		||||
 | 
			
		||||
def shift_rows(data):
 | 
			
		||||
    data_shifted = []
 | 
			
		||||
    for column in range(4):
 | 
			
		||||
        for row in range(4):
 | 
			
		||||
            data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
 | 
			
		||||
    return data_shifted
 | 
			
		||||
 | 
			
		||||
def inc(data):
 | 
			
		||||
    data = data[:] # copy
 | 
			
		||||
    for i in range(len(data)-1,-1,-1):
 | 
			
		||||
        if data[i] == 255:
 | 
			
		||||
            data[i] = 0
 | 
			
		||||
        else:
 | 
			
		||||
            data[i] = data[i] + 1
 | 
			
		||||
            break
 | 
			
		||||
    return data
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
from .appletrailers import AppleTrailersIE
 | 
			
		||||
from .addanime import AddAnimeIE
 | 
			
		||||
from .archiveorg import ArchiveOrgIE
 | 
			
		||||
from .ard import ARDIE
 | 
			
		||||
from .arte import ArteTvIE
 | 
			
		||||
@@ -6,16 +8,21 @@ from .bandcamp import BandcampIE
 | 
			
		||||
from .bliptv import BlipTVIE, BlipTVUserIE
 | 
			
		||||
from .breakcom import BreakIE
 | 
			
		||||
from .brightcove import BrightcoveIE
 | 
			
		||||
from .c56 import C56IE
 | 
			
		||||
from .canalplus import CanalplusIE
 | 
			
		||||
from .canalc2 import Canalc2IE
 | 
			
		||||
from .cnn import CNNIE
 | 
			
		||||
from .collegehumor import CollegeHumorIE
 | 
			
		||||
from .comedycentral import ComedyCentralIE
 | 
			
		||||
from .condenast import CondeNastIE
 | 
			
		||||
from .criterion import CriterionIE
 | 
			
		||||
from .cspan import CSpanIE
 | 
			
		||||
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
 | 
			
		||||
from .daum import DaumIE
 | 
			
		||||
from .depositfiles import DepositFilesIE
 | 
			
		||||
from .dotsub import DotsubIE
 | 
			
		||||
from .dreisat import DreiSatIE
 | 
			
		||||
from .defense import DefenseGouvFrIE
 | 
			
		||||
from .ehow import EHowIE
 | 
			
		||||
from .eighttracks import EightTracksIE
 | 
			
		||||
from .escapist import EscapistIE
 | 
			
		||||
@@ -29,6 +36,7 @@ from .gametrailers import GametrailersIE
 | 
			
		||||
from .generic import GenericIE
 | 
			
		||||
from .googleplus import GooglePlusIE
 | 
			
		||||
from .googlesearch import GoogleSearchIE
 | 
			
		||||
from .hark import HarkIE
 | 
			
		||||
from .hotnewhiphop import HotNewHipHopIE
 | 
			
		||||
from .howcast import HowcastIE
 | 
			
		||||
from .hypem import HypemIE
 | 
			
		||||
@@ -44,23 +52,30 @@ from .keek import KeekIE
 | 
			
		||||
from .liveleak import LiveLeakIE
 | 
			
		||||
from .livestream import LivestreamIE
 | 
			
		||||
from .metacafe import MetacafeIE
 | 
			
		||||
from .metacritic import MetacriticIE
 | 
			
		||||
from .mit import TechTVMITIE, MITIE
 | 
			
		||||
from .mixcloud import MixcloudIE
 | 
			
		||||
from .mtv import MTVIE
 | 
			
		||||
from .muzu import MuzuTVIE
 | 
			
		||||
from .myspass import MySpassIE
 | 
			
		||||
from .myvideo import MyVideoIE
 | 
			
		||||
from .naver import NaverIE
 | 
			
		||||
from .nba import NBAIE
 | 
			
		||||
from .nbc import NBCNewsIE
 | 
			
		||||
from .ooyala import OoyalaIE
 | 
			
		||||
from .orf import ORFIE
 | 
			
		||||
from .pbs import PBSIE
 | 
			
		||||
from .photobucket import PhotobucketIE
 | 
			
		||||
from .pornotube import PornotubeIE
 | 
			
		||||
from .rbmaradio import RBMARadioIE
 | 
			
		||||
from .redtube import RedTubeIE
 | 
			
		||||
from .ringtv import RingTVIE
 | 
			
		||||
from .ro220 import Ro220IE
 | 
			
		||||
from .roxwel import RoxwelIE
 | 
			
		||||
from .rtlnow import RTLnowIE
 | 
			
		||||
from .sina import SinaIE
 | 
			
		||||
from .slashdot import SlashdotIE
 | 
			
		||||
from .sohu import SohuIE
 | 
			
		||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
 | 
			
		||||
from .spiegel import SpiegelIE
 | 
			
		||||
from .stanfordoc import StanfordOpenClassroomIE
 | 
			
		||||
@@ -71,18 +86,19 @@ from .ted import TEDIE
 | 
			
		||||
from .tf1 import TF1IE
 | 
			
		||||
from .thisav import ThisAVIE
 | 
			
		||||
from .traileraddict import TrailerAddictIE
 | 
			
		||||
from .trilulilu import TriluliluIE
 | 
			
		||||
from .tudou import TudouIE
 | 
			
		||||
from .tumblr import TumblrIE
 | 
			
		||||
from .tutv import TutvIE
 | 
			
		||||
from .ustream import UstreamIE
 | 
			
		||||
from .unistra import UnistraIE
 | 
			
		||||
from .ustream import UstreamIE
 | 
			
		||||
from .vbox7 import Vbox7IE
 | 
			
		||||
from .veehd import VeeHDIE
 | 
			
		||||
from .veoh import VeohIE
 | 
			
		||||
from .vevo import VevoIE
 | 
			
		||||
from .videofyme import VideofyMeIE
 | 
			
		||||
from .vimeo import VimeoIE, VimeoChannelIE
 | 
			
		||||
from .vine import VineIE
 | 
			
		||||
from .c56 import C56IE
 | 
			
		||||
from .wat import WatIE
 | 
			
		||||
from .weibo import WeiboIE
 | 
			
		||||
from .wimp import WimpIE
 | 
			
		||||
@@ -116,12 +132,14 @@ _ALL_CLASSES = [
 | 
			
		||||
]
 | 
			
		||||
_ALL_CLASSES.append(GenericIE)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def gen_extractors():
 | 
			
		||||
    """ Return a list of an instance of every supported extractor.
 | 
			
		||||
    The order does matter; the first extractor matched is the one handling the URL.
 | 
			
		||||
    """
 | 
			
		||||
    return [klass() for klass in _ALL_CLASSES]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_info_extractor(ie_name):
 | 
			
		||||
    """Returns the info extractor class with the given ie_name"""
 | 
			
		||||
    return globals()[ie_name+'IE']
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										75
									
								
								youtube_dl/extractor/addanime.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								youtube_dl/extractor/addanime.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,75 @@
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_HTTPError,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_parse_urlparse,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AddAnimeIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
 | 
			
		||||
    IE_NAME = u'AddAnime'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
 | 
			
		||||
        u'file': u'24MR3YO5SAS9.flv',
 | 
			
		||||
        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"description": u"One Piece 606",
 | 
			
		||||
            u"title": u"One Piece 606"
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        try:
 | 
			
		||||
            mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
            video_id = mobj.group('video_id')
 | 
			
		||||
            webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        except ExtractorError as ee:
 | 
			
		||||
            if not isinstance(ee.cause, compat_HTTPError):
 | 
			
		||||
                raise
 | 
			
		||||
 | 
			
		||||
            redir_webpage = ee.cause.read().decode('utf-8')
 | 
			
		||||
            action = self._search_regex(
 | 
			
		||||
                r'<form id="challenge-form" action="([^"]+)"',
 | 
			
		||||
                redir_webpage, u'Redirect form')
 | 
			
		||||
            vc = self._search_regex(
 | 
			
		||||
                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
 | 
			
		||||
                redir_webpage, u'redirect vc value')
 | 
			
		||||
            av = re.search(
 | 
			
		||||
                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
 | 
			
		||||
                redir_webpage)
 | 
			
		||||
            if av is None:
 | 
			
		||||
                raise ExtractorError(u'Cannot find redirect math task')
 | 
			
		||||
            av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
 | 
			
		||||
 | 
			
		||||
            parsed_url = compat_urllib_parse_urlparse(url)
 | 
			
		||||
            av_val = av_res + len(parsed_url.netloc)
 | 
			
		||||
            confirm_url = (
 | 
			
		||||
                parsed_url.scheme + u'://' + parsed_url.netloc +
 | 
			
		||||
                action + '?' +
 | 
			
		||||
                compat_urllib_parse.urlencode({
 | 
			
		||||
                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
 | 
			
		||||
            self._download_webpage(
 | 
			
		||||
                confirm_url, video_id,
 | 
			
		||||
                note=u'Confirming after redirect')
 | 
			
		||||
            webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
 | 
			
		||||
                                       webpage, u'video file URL')
 | 
			
		||||
        video_title = self._og_search_title(webpage)
 | 
			
		||||
        video_description = self._og_search_description(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'video',
 | 
			
		||||
            'id':  video_id,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'description': video_description
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										166
									
								
								youtube_dl/extractor/appletrailers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										166
									
								
								youtube_dl/extractor/appletrailers.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,166 @@
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    determine_ext,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AppleTrailersIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
 | 
			
		||||
        u"playlist": [
 | 
			
		||||
            {
 | 
			
		||||
                u"file": u"manofsteel-trailer4.mov",
 | 
			
		||||
                u"md5": u"11874af099d480cc09e103b189805d5f",
 | 
			
		||||
                u"info_dict": {
 | 
			
		||||
                    u"duration": 111,
 | 
			
		||||
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
 | 
			
		||||
                    u"title": u"Trailer 4",
 | 
			
		||||
                    u"upload_date": u"20130523",
 | 
			
		||||
                    u"uploader_id": u"wb",
 | 
			
		||||
                },
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                u"file": u"manofsteel-trailer3.mov",
 | 
			
		||||
                u"md5": u"07a0a262aae5afe68120eed61137ab34",
 | 
			
		||||
                u"info_dict": {
 | 
			
		||||
                    u"duration": 182,
 | 
			
		||||
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
 | 
			
		||||
                    u"title": u"Trailer 3",
 | 
			
		||||
                    u"upload_date": u"20130417",
 | 
			
		||||
                    u"uploader_id": u"wb",
 | 
			
		||||
                },
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                u"file": u"manofsteel-trailer.mov",
 | 
			
		||||
                u"md5": u"e401fde0813008e3307e54b6f384cff1",
 | 
			
		||||
                u"info_dict": {
 | 
			
		||||
                    u"duration": 148,
 | 
			
		||||
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
 | 
			
		||||
                    u"title": u"Trailer",
 | 
			
		||||
                    u"upload_date": u"20121212",
 | 
			
		||||
                    u"uploader_id": u"wb",
 | 
			
		||||
                },
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                u"file": u"manofsteel-teaser.mov",
 | 
			
		||||
                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
 | 
			
		||||
                u"info_dict": {
 | 
			
		||||
                    u"duration": 93,
 | 
			
		||||
                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
 | 
			
		||||
                    u"title": u"Teaser",
 | 
			
		||||
                    u"upload_date": u"20120721",
 | 
			
		||||
                    u"uploader_id": u"wb",
 | 
			
		||||
                },
 | 
			
		||||
            }
 | 
			
		||||
        ]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        movie = mobj.group('movie')
 | 
			
		||||
        uploader_id = mobj.group('company')
 | 
			
		||||
 | 
			
		||||
        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
 | 
			
		||||
        playlist_snippet = self._download_webpage(playlist_url, movie)
 | 
			
		||||
        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
 | 
			
		||||
        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
 | 
			
		||||
 | 
			
		||||
        size_cache = {}
 | 
			
		||||
 | 
			
		||||
        doc = xml.etree.ElementTree.fromstring(playlist_html)
 | 
			
		||||
        playlist = []
 | 
			
		||||
        for li in doc.findall('./div/ul/li'):
 | 
			
		||||
            title = li.find('.//h3').text
 | 
			
		||||
            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
 | 
			
		||||
            thumbnail = li.find('.//img').attrib['src']
 | 
			
		||||
 | 
			
		||||
            date_el = li.find('.//p')
 | 
			
		||||
            upload_date = None
 | 
			
		||||
            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
 | 
			
		||||
            if m:
 | 
			
		||||
                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
 | 
			
		||||
            runtime_el = date_el.find('./br')
 | 
			
		||||
            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
 | 
			
		||||
            duration = None
 | 
			
		||||
            if m:
 | 
			
		||||
                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
 | 
			
		||||
 | 
			
		||||
            formats = []
 | 
			
		||||
            for formats_el in li.findall('.//a'):
 | 
			
		||||
                if formats_el.attrib['class'] != 'OverlayPanel':
 | 
			
		||||
                    continue
 | 
			
		||||
                target = formats_el.attrib['target']
 | 
			
		||||
 | 
			
		||||
                format_code = formats_el.text
 | 
			
		||||
                if 'Automatic' in format_code:
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                size_q = formats_el.attrib['href']
 | 
			
		||||
                size_id = size_q.rpartition('#videos-')[2]
 | 
			
		||||
                if size_id not in size_cache:
 | 
			
		||||
                    size_url = url + size_q
 | 
			
		||||
                    sizepage_html = self._download_webpage(
 | 
			
		||||
                        size_url, movie,
 | 
			
		||||
                        note=u'Downloading size info %s' % size_id,
 | 
			
		||||
                        errnote=u'Error while downloading size info %s' % size_id,
 | 
			
		||||
                    )
 | 
			
		||||
                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
 | 
			
		||||
                    size_cache[size_id] = _doc
 | 
			
		||||
 | 
			
		||||
                sizepage_doc = size_cache[size_id]
 | 
			
		||||
                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
 | 
			
		||||
                for vid_a in links:
 | 
			
		||||
                    href = vid_a.get('href')
 | 
			
		||||
                    if not href.endswith(target):
 | 
			
		||||
                        continue
 | 
			
		||||
                    detail_q = href.partition('#')[0]
 | 
			
		||||
                    detail_url = url + '/' + detail_q
 | 
			
		||||
 | 
			
		||||
                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
 | 
			
		||||
                    detail_id = m.group('detail_id')
 | 
			
		||||
 | 
			
		||||
                    detail_html = self._download_webpage(
 | 
			
		||||
                        detail_url, movie,
 | 
			
		||||
                        note=u'Downloading detail %s %s' % (detail_id, size_id),
 | 
			
		||||
                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
 | 
			
		||||
                    )
 | 
			
		||||
                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
 | 
			
		||||
                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
 | 
			
		||||
                    assert movie_link_el.get('class') == 'movieLink'
 | 
			
		||||
                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
 | 
			
		||||
                    ext = determine_ext(movie_link)
 | 
			
		||||
                    assert ext == 'mov'
 | 
			
		||||
 | 
			
		||||
                    formats.append({
 | 
			
		||||
                        'format': format_code,
 | 
			
		||||
                        'ext': ext,
 | 
			
		||||
                        'url': movie_link,
 | 
			
		||||
                    })
 | 
			
		||||
 | 
			
		||||
            info = {
 | 
			
		||||
                '_type': 'video',
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'formats': formats,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'duration': duration,
 | 
			
		||||
                'thumbnail': thumbnail,
 | 
			
		||||
                'upload_date': upload_date,
 | 
			
		||||
                'uploader_id': uploader_id,
 | 
			
		||||
                'user_agent': 'QuickTime compatible (youtube-dl)',
 | 
			
		||||
            }
 | 
			
		||||
            # TODO: Remove when #980 has been merged
 | 
			
		||||
            info['url'] = formats[-1]['url']
 | 
			
		||||
            info['ext'] = formats[-1]['ext']
 | 
			
		||||
 | 
			
		||||
            playlist.append(info)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'playlist',
 | 
			
		||||
            'id': movie,
 | 
			
		||||
            'entries': playlist,
 | 
			
		||||
        }
 | 
			
		||||
@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
    _TEST ={
 | 
			
		||||
        u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
 | 
			
		||||
        u'file': u'93440716.mp4',
 | 
			
		||||
        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
 | 
			
		||||
        u'file': u'93440716.flv',
 | 
			
		||||
        u'md5': u'e59995ac63d0457783ea05f93f12a866',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'网事知多少 第32期:车怒',
 | 
			
		||||
        },
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										35
									
								
								youtube_dl/extractor/canalc2.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								youtube_dl/extractor/canalc2.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Canalc2IE(InfoExtractor):
 | 
			
		||||
    _IE_NAME = 'canalc2.tv'
 | 
			
		||||
    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
 | 
			
		||||
        u'file': u'12163.mp4',
 | 
			
		||||
        u'md5': u'060158428b650f896c542dfbb3d6487f',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Terrasses du Numérique'
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = re.match(self._VALID_URL, url).group(1)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        file_name = self._search_regex(
 | 
			
		||||
            r"so\.addVariable\('file','(.*?)'\);",
 | 
			
		||||
            webpage, 'file name')
 | 
			
		||||
        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
 | 
			
		||||
 | 
			
		||||
        title = self._html_search_regex(
 | 
			
		||||
            r'class="evenement8">(.*?)</a>', webpage, u'title')
 | 
			
		||||
        
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                }
 | 
			
		||||
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 | 
			
		||||
from ..utils import unified_strdate
 | 
			
		||||
 | 
			
		||||
class CanalplusIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
 | 
			
		||||
    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
 | 
			
		||||
    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
 | 
			
		||||
    IE_NAME = u'canalplus.fr'
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/cnn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/cnn.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CNNIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
 | 
			
		||||
        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
 | 
			
		||||
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
 | 
			
		||||
        u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
 | 
			
		||||
        u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Nadal wins 8th French Open title',
 | 
			
		||||
            u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
 | 
			
		||||
        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
 | 
			
		||||
        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
 | 
			
		||||
        u"info_dict": {
 | 
			
		||||
            u"title": "Student's epic speech stuns new freshmen",
 | 
			
		||||
            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
 | 
			
		||||
        }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        path = mobj.group('path')
 | 
			
		||||
        page_title = mobj.group('title')
 | 
			
		||||
        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
 | 
			
		||||
        info_xml = self._download_webpage(info_url, page_title)
 | 
			
		||||
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for f in info.findall('files/file'):
 | 
			
		||||
            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
 | 
			
		||||
            if mf is not None:
 | 
			
		||||
                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
 | 
			
		||||
        formats = sorted(formats)
 | 
			
		||||
        (_,_,_, video_path) = formats[-1]
 | 
			
		||||
        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
 | 
			
		||||
 | 
			
		||||
        thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
 | 
			
		||||
        thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
 | 
			
		||||
 | 
			
		||||
        return {'id': info.attrib['id'],
 | 
			
		||||
                'title': info.find('headline').text,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': determine_ext(video_url),
 | 
			
		||||
                'thumbnail': thumbnails[-1][1],
 | 
			
		||||
                'thumbnails': thumbs_dict,
 | 
			
		||||
                'description': info.find('description').text,
 | 
			
		||||
                }
 | 
			
		||||
@@ -114,6 +114,11 @@ class InfoExtractor(object):
 | 
			
		||||
        """Real extraction process. Redefine in subclasses."""
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def ie_key(cls):
 | 
			
		||||
        """A string for getting the InfoExtractor with get_info_extractor"""
 | 
			
		||||
        return cls.__name__[:-2]
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def IE_NAME(self):
 | 
			
		||||
        return type(self).__name__[:-2]
 | 
			
		||||
@@ -129,7 +134,7 @@ class InfoExtractor(object):
 | 
			
		||||
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 | 
			
		||||
            if errnote is None:
 | 
			
		||||
                errnote = u'Unable to download webpage'
 | 
			
		||||
            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
 | 
			
		||||
            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
 | 
			
		||||
 | 
			
		||||
    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
 | 
			
		||||
        """ Returns a tuple (page content as string, URL handle) """
 | 
			
		||||
@@ -140,12 +145,17 @@ class InfoExtractor(object):
 | 
			
		||||
 | 
			
		||||
        urlh = self._request_webpage(url_or_request, video_id, note, errnote)
 | 
			
		||||
        content_type = urlh.headers.get('Content-Type', '')
 | 
			
		||||
        webpage_bytes = urlh.read()
 | 
			
		||||
        m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
 | 
			
		||||
        if m:
 | 
			
		||||
            encoding = m.group(1)
 | 
			
		||||
        else:
 | 
			
		||||
            encoding = 'utf-8'
 | 
			
		||||
        webpage_bytes = urlh.read()
 | 
			
		||||
            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
 | 
			
		||||
                          webpage_bytes[:1024])
 | 
			
		||||
            if m:
 | 
			
		||||
                encoding = m.group(1).decode('ascii')
 | 
			
		||||
            else:
 | 
			
		||||
                encoding = 'utf-8'
 | 
			
		||||
        if self._downloader.params.get('dump_intermediate_pages', False):
 | 
			
		||||
            try:
 | 
			
		||||
                url = url_or_request.get_full_url()
 | 
			
		||||
 
 | 
			
		||||
@@ -37,14 +37,14 @@ class DailyMotionSubtitlesIE(NoAutoSubtitlesIE):
 | 
			
		||||
class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
 | 
			
		||||
    """Information Extractor for Dailymotion"""
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
 | 
			
		||||
    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
 | 
			
		||||
    IE_NAME = u'dailymotion'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
 | 
			
		||||
        u'file': u'x33vw9.mp4',
 | 
			
		||||
        u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"uploader": u"Alex and Van .", 
 | 
			
		||||
            u"uploader": u"Amphora Alex and Van .", 
 | 
			
		||||
            u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
@@ -56,6 +56,7 @@ class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
 | 
			
		||||
        video_id = mobj.group(1).split('_')[0].split('?')[0]
 | 
			
		||||
 | 
			
		||||
        video_extension = 'mp4'
 | 
			
		||||
        url = 'http://www.dailymotion.com/video/%s' % video_id
 | 
			
		||||
 | 
			
		||||
        # Retrieve video webpage to extract further information
 | 
			
		||||
        request = compat_urllib_request.Request(url)
 | 
			
		||||
@@ -78,7 +79,8 @@ class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
 | 
			
		||||
        embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
 | 
			
		||||
        embed_page = self._download_webpage(embed_url, video_id,
 | 
			
		||||
                                            u'Downloading embed page')
 | 
			
		||||
        info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
 | 
			
		||||
        info = self._search_regex(r'var info = ({.*?}),$', embed_page,
 | 
			
		||||
            'video info', flags=re.MULTILINE)
 | 
			
		||||
        info = json.loads(info)
 | 
			
		||||
 | 
			
		||||
        # TODO: support choosing qualities
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/daum.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/daum.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,74 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DaumIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
 | 
			
		||||
    IE_NAME = u'daum.net'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
 | 
			
		||||
        u'file': u'52554690.mp4',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
 | 
			
		||||
            u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
 | 
			
		||||
            u'upload_date': u'20130831',
 | 
			
		||||
            u'duration': 3868,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
        canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
 | 
			
		||||
        webpage = self._download_webpage(canonical_url, video_id)
 | 
			
		||||
        full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
 | 
			
		||||
            webpage, u'full id')
 | 
			
		||||
        query = compat_urllib_parse.urlencode({'vid': full_id})
 | 
			
		||||
        info_xml = self._download_webpage(
 | 
			
		||||
            'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
 | 
			
		||||
            u'Downloading video info')
 | 
			
		||||
        urls_xml = self._download_webpage(
 | 
			
		||||
            'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
 | 
			
		||||
            video_id, u'Downloading video formats info')
 | 
			
		||||
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
 | 
			
		||||
        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
        self.to_screen(u'%s: Getting video urls' % video_id)
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_el in urls.findall('result/output_list/output_list'):
 | 
			
		||||
            profile = format_el.attrib['profile']
 | 
			
		||||
            format_query = compat_urllib_parse.urlencode({
 | 
			
		||||
                'vid': full_id,
 | 
			
		||||
                'profile': profile,
 | 
			
		||||
            })
 | 
			
		||||
            url_xml = self._download_webpage(
 | 
			
		||||
                'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
 | 
			
		||||
                video_id, note=False)
 | 
			
		||||
            url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
 | 
			
		||||
            format_url = url_doc.find('result/url').text
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': format_url,
 | 
			
		||||
                'ext': determine_ext(format_url),
 | 
			
		||||
                'format_id': profile,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': info.find('TITLE').text,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            'description': info.find('CONTENTS').text,
 | 
			
		||||
            'duration': int(info.find('DURATION').text),
 | 
			
		||||
            'upload_date': info.find('REGDTTM').text[:8],
 | 
			
		||||
        }
 | 
			
		||||
        # TODO: Remove when #980 has been merged
 | 
			
		||||
        info.update(formats[-1])
 | 
			
		||||
        return info
 | 
			
		||||
							
								
								
									
										39
									
								
								youtube_dl/extractor/defense.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								youtube_dl/extractor/defense.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DefenseGouvFrIE(InfoExtractor):
 | 
			
		||||
    _IE_NAME = 'defense.gouv.fr'
 | 
			
		||||
    _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
 | 
			
		||||
        r'ligthboxvideo/base-de-medias/webtv/(.*)')
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
 | 
			
		||||
        u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
 | 
			
		||||
        u'file': u'11213.mp4',
 | 
			
		||||
        u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
 | 
			
		||||
        "info_dict": {
 | 
			
		||||
            "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        title = re.match(self._VALID_URL, url).group(1)
 | 
			
		||||
        webpage = self._download_webpage(url, title)
 | 
			
		||||
        video_id = self._search_regex(
 | 
			
		||||
            r"flashvars.pvg_id=\"(\d+)\";",
 | 
			
		||||
            webpage, 'ID')
 | 
			
		||||
        
 | 
			
		||||
        json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
 | 
			
		||||
            + video_id)
 | 
			
		||||
        info = self._download_webpage(json_url, title,
 | 
			
		||||
                                                  'Downloading JSON config')
 | 
			
		||||
        video_url = json.loads(info)['renditions'][0]['url']
 | 
			
		||||
        
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                }
 | 
			
		||||
@@ -8,11 +8,13 @@ from ..utils import (
 | 
			
		||||
    compat_urllib_error,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
from .brightcove import BrightcoveIE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class GenericIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = u'Generic downloader that works on some sites'
 | 
			
		||||
    _VALID_URL = r'.*'
 | 
			
		||||
@@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            u'file': u'13601338388002.mp4',
 | 
			
		||||
            u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u"uploader": u"www.hodiho.fr", 
 | 
			
		||||
                u"uploader": u"www.hodiho.fr",
 | 
			
		||||
                u"title": u"R\u00e9gis plante sa Jeep"
 | 
			
		||||
            }
 | 
			
		||||
        },
 | 
			
		||||
@@ -107,6 +109,11 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        return new_url
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        parsed_url = compat_urlparse.urlparse(url)
 | 
			
		||||
        if not parsed_url.scheme:
 | 
			
		||||
            self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
 | 
			
		||||
            return self.url_result('http://' + url)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            new_url = self._test_redirect(url)
 | 
			
		||||
            if new_url:
 | 
			
		||||
@@ -124,7 +131,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
        # Look for BrigthCove:
 | 
			
		||||
        # Look for BrightCove:
 | 
			
		||||
        m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
 | 
			
		||||
        if m_brightcove is not None:
 | 
			
		||||
            self.to_screen(u'Brightcove video detected.')
 | 
			
		||||
@@ -151,7 +158,7 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
                mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            # HTML5 video
 | 
			
		||||
            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
 | 
			
		||||
            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
@@ -160,8 +167,9 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
        if mobj.group(1) is None:
 | 
			
		||||
            raise ExtractorError(u'Invalid URL: %s' % url)
 | 
			
		||||
 | 
			
		||||
        video_url = compat_urllib_parse.unquote(mobj.group(1))
 | 
			
		||||
        video_id = os.path.basename(video_url)
 | 
			
		||||
        video_url = mobj.group(1)
 | 
			
		||||
        video_url = compat_urlparse.urljoin(url, video_url)
 | 
			
		||||
        video_id = compat_urllib_parse.unquote(os.path.basename(video_url))
 | 
			
		||||
 | 
			
		||||
        # here's a fun little line of code for you:
 | 
			
		||||
        video_extension = os.path.splitext(video_id)[1][1:]
 | 
			
		||||
 
 | 
			
		||||
@@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
 | 
			
		||||
            webpage, 'title', default=u'NA')
 | 
			
		||||
 | 
			
		||||
        # Step 2, Simulate clicking the image box to launch video
 | 
			
		||||
        DOMAIN = 'https://plus.google.com'
 | 
			
		||||
        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
 | 
			
		||||
        DOMAIN = 'https://plus.google.com/'
 | 
			
		||||
        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
 | 
			
		||||
            webpage, u'video page URL')
 | 
			
		||||
        if not video_page.startswith(DOMAIN):
 | 
			
		||||
            video_page = DOMAIN + video_page
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/hark.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/hark.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,37 @@
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import determine_ext
 | 
			
		||||
 | 
			
		||||
class HarkIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
 | 
			
		||||
        u'file': u'mmbzyhkgny.mp3',
 | 
			
		||||
        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
 | 
			
		||||
            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
 | 
			
		||||
            u'duration': 11,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
        json_url = "http://www.hark.com/clips/%s.json" %(video_id)
 | 
			
		||||
        info_json = self._download_webpage(json_url, video_id)
 | 
			
		||||
        info = json.loads(info_json)
 | 
			
		||||
        final_url = info['url']
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'url' : final_url,
 | 
			
		||||
                'title': info['name'],
 | 
			
		||||
                'ext': determine_ext(final_url),
 | 
			
		||||
                'description': info['description'],
 | 
			
		||||
                'thumbnail': info['image_original'],
 | 
			
		||||
                'duration': info['duration'],
 | 
			
		||||
                }
 | 
			
		||||
@@ -13,7 +13,7 @@ class IGNIE(InfoExtractor):
 | 
			
		||||
    Some videos of it.ign.com are also supported
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
 | 
			
		||||
    _VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles)(/.+)?/(?P<name_or_id>.+)'
 | 
			
		||||
    IE_NAME = u'ign.com'
 | 
			
		||||
 | 
			
		||||
    _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
 | 
			
		||||
@@ -41,7 +41,11 @@ class IGNIE(InfoExtractor):
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        name_or_id = mobj.group('name_or_id')
 | 
			
		||||
        page_type = mobj.group('type')
 | 
			
		||||
        webpage = self._download_webpage(url, name_or_id)
 | 
			
		||||
        if page_type == 'articles':
 | 
			
		||||
            video_url = self._search_regex(r'var videoUrl = "(.+?)"', webpage, u'video url')
 | 
			
		||||
            return self.url_result(video_url, ie='IGN')
 | 
			
		||||
        video_id = self._find_video_id(webpage)
 | 
			
		||||
        result = self._get_video_info(video_id)
 | 
			
		||||
        description = self._html_search_regex(self._DESCRIPTION_RE,
 | 
			
		||||
@@ -68,7 +72,7 @@ class IGNIE(InfoExtractor):
 | 
			
		||||
class OneUPIE(IGNIE):
 | 
			
		||||
    """Extractor for 1up.com, it uses the ign videos system."""
 | 
			
		||||
 | 
			
		||||
    _VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
 | 
			
		||||
    _VALID_URL = r'https?://gamevideos.1up.com/(?P<type>video)/id/(?P<name_or_id>.+)'
 | 
			
		||||
    IE_NAME = '1up.com'
 | 
			
		||||
 | 
			
		||||
    _DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
 | 
			
		||||
 
 | 
			
		||||
@@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
 | 
			
		||||
        gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
 | 
			
		||||
        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
 | 
			
		||||
        surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
 | 
			
		||||
        gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
 | 
			
		||||
        gcid = gcids[-1]
 | 
			
		||||
 | 
			
		||||
        video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
 | 
			
		||||
                                                 video_id, u'Downloading video url info')
 | 
			
		||||
 
 | 
			
		||||
@@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
 | 
			
		||||
        video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, u'title')
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
        video_uploader = self._html_search_regex(
 | 
			
		||||
                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
 | 
			
		||||
                r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
 | 
			
		||||
                webpage, u'uploader nickname', fatal=False)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/metacritic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/metacritic.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,55 @@
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
import operator
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MetacriticIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
 | 
			
		||||
        u'file': u'3698222.mp4',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
 | 
			
		||||
            u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
 | 
			
		||||
            u'duration': 221,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        # The xml is not well formatted, there are raw '&'
 | 
			
		||||
        info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
 | 
			
		||||
            video_id, u'Downloading info xml').replace('&', '&')
 | 
			
		||||
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
 | 
			
		||||
        formats = []
 | 
			
		||||
        for videoFile in clip.findall('httpURI/videoFile'):
 | 
			
		||||
            rate_str = videoFile.find('rate').text
 | 
			
		||||
            video_url = videoFile.find('filePath').text
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'format_id': rate_str,
 | 
			
		||||
                'rate': int(rate_str),
 | 
			
		||||
            })
 | 
			
		||||
        formats.sort(key=operator.itemgetter('rate'))
 | 
			
		||||
 | 
			
		||||
        description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
 | 
			
		||||
            webpage, u'description', flags=re.DOTALL)
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': clip.find('title').text,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'duration': int(clip.find('duration').text),
 | 
			
		||||
        }
 | 
			
		||||
        # TODO: Remove when #980 has been merged
 | 
			
		||||
        info.update(formats[-1])
 | 
			
		||||
        return info
 | 
			
		||||
							
								
								
									
										74
									
								
								youtube_dl/extractor/mit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								youtube_dl/extractor/mit.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,74 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    get_element_by_id,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TechTVMITIE(InfoExtractor):
 | 
			
		||||
    IE_NAME = u'techtv.mit.edu'
 | 
			
		||||
    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
 | 
			
		||||
        u'file': u'25418.mp4',
 | 
			
		||||
        u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'MIT DNA Learning Center Set',
 | 
			
		||||
            u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        raw_page = self._download_webpage(
 | 
			
		||||
            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
 | 
			
		||||
        clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
 | 
			
		||||
 | 
			
		||||
        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
 | 
			
		||||
            raw_page, u'base url')
 | 
			
		||||
        formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
 | 
			
		||||
            u'video formats')
 | 
			
		||||
        formats = json.loads(formats_json)
 | 
			
		||||
        formats = sorted(formats, key=lambda f: f['bitrate'])
 | 
			
		||||
 | 
			
		||||
        title = get_element_by_id('edit-title', clean_page)
 | 
			
		||||
        description = clean_html(get_element_by_id('edit-description', clean_page))
 | 
			
		||||
        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
 | 
			
		||||
            raw_page, u'thumbnail', flags=re.DOTALL)
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'url': base_url + formats[-1]['url'].replace('mp4:', ''),
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'description': description,
 | 
			
		||||
                'thumbnail': thumbnail,
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MITIE(TechTVMITIE):
 | 
			
		||||
    IE_NAME = u'video.mit.edu'
 | 
			
		||||
    _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
 | 
			
		||||
        u'file': u'21783.mp4',
 | 
			
		||||
        u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'The Government is Profiling You',
 | 
			
		||||
            u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        page_title = mobj.group('title')
 | 
			
		||||
        webpage = self._download_webpage(url, page_title)
 | 
			
		||||
        self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
 | 
			
		||||
        embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
 | 
			
		||||
            u'embed url')
 | 
			
		||||
        return self.url_result(embed_url, ie='TechTVMIT')
 | 
			
		||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/naver.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/naver.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,73 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NaverIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://tvcast.naver.com/v/81652',
 | 
			
		||||
        u'file': u'81652.mp4',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
 | 
			
		||||
            u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
 | 
			
		||||
            u'upload_date': u'20130903',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group(1)
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
 | 
			
		||||
            webpage)
 | 
			
		||||
        if m_id is None:
 | 
			
		||||
            raise ExtractorError(u'couldn\'t extract vid and key')
 | 
			
		||||
        vid = m_id.group(1)
 | 
			
		||||
        key = m_id.group(2)
 | 
			
		||||
        query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
 | 
			
		||||
        query_urls = compat_urllib_parse.urlencode({
 | 
			
		||||
            'masterVid': vid,
 | 
			
		||||
            'protocol': 'p2p',
 | 
			
		||||
            'inKey': key,
 | 
			
		||||
        })
 | 
			
		||||
        info_xml = self._download_webpage(
 | 
			
		||||
            'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
 | 
			
		||||
            video_id, u'Downloading video info')
 | 
			
		||||
        urls_xml = self._download_webpage(
 | 
			
		||||
            'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
 | 
			
		||||
            video_id, u'Downloading video formats info')
 | 
			
		||||
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
 | 
			
		||||
        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_el in urls.findall('EncodingOptions/EncodingOption'):
 | 
			
		||||
            domain = format_el.find('Domain').text
 | 
			
		||||
            if domain.startswith('rtmp'):
 | 
			
		||||
                continue
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': domain + format_el.find('uri').text,
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
                'width': int(format_el.find('width').text),
 | 
			
		||||
                'height': int(format_el.find('height').text),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': info.find('Subject').text,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            'upload_date': info.find('WriteDate').text.replace('.', ''),
 | 
			
		||||
            'view_count': int(info.find('PlayCount').text),
 | 
			
		||||
        }
 | 
			
		||||
        # TODO: Remove when #980 has been merged
 | 
			
		||||
        info.update(formats[-1])
 | 
			
		||||
        return info
 | 
			
		||||
							
								
								
									
										33
									
								
								youtube_dl/extractor/nbc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube_dl/extractor/nbc.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,33 @@
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import find_xpath_attr, compat_str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NBCNewsIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
 | 
			
		||||
        u'file': u'52753292.flv',
 | 
			
		||||
        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Crew emerges after four-month Mars food study',
 | 
			
		||||
            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
 | 
			
		||||
        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
 | 
			
		||||
 | 
			
		||||
        return {'id': video_id,
 | 
			
		||||
                'title': info.find('headline').text,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
 | 
			
		||||
                'description': compat_str(info.find('caption').text),
 | 
			
		||||
                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
 | 
			
		||||
                }
 | 
			
		||||
							
								
								
									
										54
									
								
								youtube_dl/extractor/orf.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								youtube_dl/extractor/orf.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,54 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class ORFIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        playlist_id = mobj.group('id')
 | 
			
		||||
        webpage = self._download_webpage(url, playlist_id)
 | 
			
		||||
 | 
			
		||||
        flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
 | 
			
		||||
        flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
 | 
			
		||||
        flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
 | 
			
		||||
        playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
 | 
			
		||||
        playlist = json.loads(playlist_json)
 | 
			
		||||
 | 
			
		||||
        videos = []
 | 
			
		||||
        ns = '{http://tempuri.org/XMLSchema.xsd}'
 | 
			
		||||
        xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
 | 
			
		||||
        webpage_description = self._og_search_description(webpage)
 | 
			
		||||
        for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
 | 
			
		||||
            # Get best quality url
 | 
			
		||||
            rtmp_url = None
 | 
			
		||||
            for q in ['Q6A', 'Q4A', 'Q1A']:
 | 
			
		||||
                video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
 | 
			
		||||
                if video_url is not None:
 | 
			
		||||
                    rtmp_url = video_url.text
 | 
			
		||||
                    break
 | 
			
		||||
            if rtmp_url is None:
 | 
			
		||||
                raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
 | 
			
		||||
            description = self._html_search_regex(
 | 
			
		||||
                r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
 | 
			
		||||
                u'description', default=webpage_description, flags=re.DOTALL)
 | 
			
		||||
            videos.append({
 | 
			
		||||
                '_type': 'video',
 | 
			
		||||
                'id': info['id'],
 | 
			
		||||
                'title': info['title'],
 | 
			
		||||
                'url': rtmp_url,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'description': description,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
        return videos
 | 
			
		||||
							
								
								
									
										42
									
								
								youtube_dl/extractor/ro220.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								youtube_dl/extractor/ro220.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    compat_parse_qs,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Ro220IE(InfoExtractor):
 | 
			
		||||
    IE_NAME = '220.ro'
 | 
			
		||||
    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
 | 
			
		||||
        u'file': u'LYV6doKo7f.mp4',
 | 
			
		||||
        u'md5': u'03af18b73a07b4088753930db7a34add',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"Luati-le Banii sez 4 ep 1",
 | 
			
		||||
            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('video_id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        flashVars_str = self._search_regex(
 | 
			
		||||
            r'<param name="flashVars" value="([^"]+)"',
 | 
			
		||||
            webpage, u'flashVars')
 | 
			
		||||
        flashVars = compat_parse_qs(flashVars_str)
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            '_type': 'video',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'url': flashVars['videoURL'][0],
 | 
			
		||||
            'title': flashVars['title'][0],
 | 
			
		||||
            'description': clean_html(flashVars['desc'][0]),
 | 
			
		||||
            'thumbnail': flashVars['preview'][0],
 | 
			
		||||
        }
 | 
			
		||||
        return info
 | 
			
		||||
@@ -8,8 +8,8 @@ from ..utils import (
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class RTLnowIE(InfoExtractor):
 | 
			
		||||
    """Information Extractor for RTLnow, RTL2now and VOXnow"""
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
 | 
			
		||||
    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
 | 
			
		||||
    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
 | 
			
		||||
        u'file': u'90419.flv',
 | 
			
		||||
@@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
 | 
			
		||||
        u'params': {
 | 
			
		||||
            u'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
 | 
			
		||||
        u'file': u'99205.flv',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'upload_date': u'20080928', 
 | 
			
		||||
            u'title': u'Medicopter 117 - Angst!',
 | 
			
		||||
            u'description': u'Angst!',
 | 
			
		||||
            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
 | 
			
		||||
        },
 | 
			
		||||
        u'params': {
 | 
			
		||||
            u'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self,url):
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										90
									
								
								youtube_dl/extractor/sohu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								youtube_dl/extractor/sohu.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,90 @@
 | 
			
		||||
# encoding: utf-8
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import ExtractorError
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SohuIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
 | 
			
		||||
        u'file': u'382479172.mp4',
 | 
			
		||||
        u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'MV:Far East Movement《The Illest》',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 | 
			
		||||
        def _fetch_data(vid_id):
 | 
			
		||||
            base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
 | 
			
		||||
            data_url = base_data_url + str(vid_id)
 | 
			
		||||
            data_json = self._download_webpage(
 | 
			
		||||
                data_url, video_id,
 | 
			
		||||
                note=u'Downloading JSON data for ' + str(vid_id))
 | 
			
		||||
            return json.loads(data_json)
 | 
			
		||||
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
 | 
			
		||||
                                            webpage, u'video title')
 | 
			
		||||
        title = raw_title.partition('-')[0].strip()
 | 
			
		||||
 | 
			
		||||
        vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
 | 
			
		||||
                                      u'video path')
 | 
			
		||||
        data = _fetch_data(vid)
 | 
			
		||||
 | 
			
		||||
        QUALITIES = ('ori', 'super', 'high', 'nor')
 | 
			
		||||
        vid_ids = [data['data'][q + 'Vid']
 | 
			
		||||
                   for q in QUALITIES
 | 
			
		||||
                   if data['data'][q + 'Vid'] != 0]
 | 
			
		||||
        if not vid_ids:
 | 
			
		||||
            raise ExtractorError(u'No formats available for this video')
 | 
			
		||||
 | 
			
		||||
        # For now, we just pick the highest available quality
 | 
			
		||||
        vid_id = vid_ids[-1]
 | 
			
		||||
 | 
			
		||||
        format_data = data if vid == vid_id else _fetch_data(vid_id)
 | 
			
		||||
        part_count = format_data['data']['totalBlocks']
 | 
			
		||||
        allot = format_data['allot']
 | 
			
		||||
        prot = format_data['prot']
 | 
			
		||||
        clipsURL = format_data['data']['clipsURL']
 | 
			
		||||
        su = format_data['data']['su']
 | 
			
		||||
 | 
			
		||||
        playlist = []
 | 
			
		||||
        for i in range(part_count):
 | 
			
		||||
            part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
 | 
			
		||||
                        (allot, prot, clipsURL[i], su[i]))
 | 
			
		||||
            part_str = self._download_webpage(
 | 
			
		||||
                part_url, video_id,
 | 
			
		||||
                note=u'Downloading part %d of %d' % (i+1, part_count))
 | 
			
		||||
 | 
			
		||||
            part_info = part_str.split('|')
 | 
			
		||||
            video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
 | 
			
		||||
 | 
			
		||||
            video_info = {
 | 
			
		||||
                'id': '%s_part%02d' % (video_id, i + 1),
 | 
			
		||||
                'title': title,
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'ext': 'mp4',
 | 
			
		||||
            }
 | 
			
		||||
            playlist.append(video_info)
 | 
			
		||||
 | 
			
		||||
        if len(playlist) == 1:
 | 
			
		||||
            info = playlist[0]
 | 
			
		||||
            info['id'] = video_id
 | 
			
		||||
        else:
 | 
			
		||||
            info = {
 | 
			
		||||
                '_type': 'playlist',
 | 
			
		||||
                'entries': playlist,
 | 
			
		||||
                'id': video_id,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/trilulilu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/trilulilu.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,73 @@
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TriluliluIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
 | 
			
		||||
        u'file': u"big-buck-bunny-1.mp4",
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"Big Buck Bunny",
 | 
			
		||||
            u"description": u":) pentru copilul din noi",
 | 
			
		||||
        },
 | 
			
		||||
        # Server ignores Range headers (--test)
 | 
			
		||||
        u"params": {
 | 
			
		||||
            u"skip_download": True
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('video_id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(webpage)
 | 
			
		||||
        thumbnail = self._og_search_thumbnail(webpage)
 | 
			
		||||
        description = self._og_search_description(webpage)
 | 
			
		||||
 | 
			
		||||
        log_str = self._search_regex(
 | 
			
		||||
            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
 | 
			
		||||
        log = json.loads(log_str)
 | 
			
		||||
 | 
			
		||||
        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
 | 
			
		||||
                      u'video-formats2' % log)
 | 
			
		||||
        format_str = self._download_webpage(
 | 
			
		||||
            format_url, video_id,
 | 
			
		||||
            note=u'Downloading formats',
 | 
			
		||||
            errnote=u'Error while downloading formats')
 | 
			
		||||
 | 
			
		||||
        format_doc = xml.etree.ElementTree.fromstring(format_str)
 | 
			
		||||
 
 | 
			
		||||
        video_url_template = (
 | 
			
		||||
            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
 | 
			
		||||
            u'&source=site&hash=%(hash)s&username=%(userid)s&'
 | 
			
		||||
            u'key=ministhebest&format=%%s&sig=&exp=' %
 | 
			
		||||
            log)
 | 
			
		||||
        formats = [
 | 
			
		||||
            {
 | 
			
		||||
                'format': fnode.text,
 | 
			
		||||
                'url': video_url_template % fnode.text,
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            for fnode in format_doc.findall('./formats/format')
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        info = {
 | 
			
		||||
            '_type': 'video',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        # TODO: Remove when #980 has been merged
 | 
			
		||||
        info['url'] = formats[-1]['url']
 | 
			
		||||
        info['ext'] = formats[-1]['format'].partition('-')[0]
 | 
			
		||||
 | 
			
		||||
        return info
 | 
			
		||||
@@ -11,7 +11,7 @@ class UnistraIE(InfoExtractor):
 | 
			
		||||
        u'md5': u'736f605cfdc96724d55bb543ab3ced24',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'M!ss Yella',
 | 
			
		||||
            u'description': u'md5:75e8439a3e2981cd5d4b6db232e8fdfc',
 | 
			
		||||
            u'description': u'md5:104892c71bd48e55d70b902736b81bbf',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/veehd.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/veehd.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,56 @@
 | 
			
		||||
import re
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    get_element_by_id,
 | 
			
		||||
    clean_html,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class VeeHDIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://veehd.com/video/4686958',
 | 
			
		||||
        u'file': u'4686958.mp4',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u'title': u'Time Lapse View from Space ( ISS)',
 | 
			
		||||
            u'uploader_id': u'spotted',
 | 
			
		||||
            u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
        player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
 | 
			
		||||
            webpage, u'player path')
 | 
			
		||||
        player_url = compat_urlparse.urljoin(url, player_path)
 | 
			
		||||
        player_page = self._download_webpage(player_url, video_id,
 | 
			
		||||
            u'Downloading player page')
 | 
			
		||||
        config_json = self._search_regex(r'value=\'config=({.+?})\'',
 | 
			
		||||
            player_page, u'config json')
 | 
			
		||||
        config = json.loads(config_json)
 | 
			
		||||
 | 
			
		||||
        video_url = compat_urlparse.unquote(config['clip']['url'])
 | 
			
		||||
        title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
 | 
			
		||||
        uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
 | 
			
		||||
            webpage, u'uploader')
 | 
			
		||||
        thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
 | 
			
		||||
            webpage, u'thumbnail')
 | 
			
		||||
        description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
 | 
			
		||||
            webpage, u'description', flags=re.DOTALL)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            '_type': 'video',
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': video_url,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'uploader_id': uploader_id,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'description': description,
 | 
			
		||||
        }
 | 
			
		||||
@@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
 | 
			
		||||
                u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
            u'url': u'http://player.vimeo.com/video/54469442',
 | 
			
		||||
            u'file': u'54469442.mp4',
 | 
			
		||||
            u'md5': u'619b811a4417aa4abe78dc653becf511',
 | 
			
		||||
            u'note': u'Videos that embed the url in the player page',
 | 
			
		||||
            u'info_dict': {
 | 
			
		||||
                u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
 | 
			
		||||
                u'uploader': u'The BLN & Business of Software',
 | 
			
		||||
            },
 | 
			
		||||
        },
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    def _login(self):
 | 
			
		||||
@@ -112,7 +122,8 @@ class VimeoIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Extract the config JSON
 | 
			
		||||
        try:
 | 
			
		||||
            config = webpage.split(' = {config:')[1].split(',assets:')[0]
 | 
			
		||||
            config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
 | 
			
		||||
                webpage, u'info section', flags=re.DOTALL)
 | 
			
		||||
            config = json.loads(config)
 | 
			
		||||
        except:
 | 
			
		||||
            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
 | 
			
		||||
@@ -132,12 +143,22 @@ class VimeoIE(InfoExtractor):
 | 
			
		||||
        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
 | 
			
		||||
 | 
			
		||||
        # Extract video thumbnail
 | 
			
		||||
        video_thumbnail = config["video"]["thumbnail"]
 | 
			
		||||
        video_thumbnail = config["video"].get("thumbnail")
 | 
			
		||||
        if video_thumbnail is None:
 | 
			
		||||
            _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
 | 
			
		||||
 | 
			
		||||
        # Extract video description
 | 
			
		||||
        video_description = get_element_by_attribute("itemprop", "description", webpage)
 | 
			
		||||
        if video_description: video_description = clean_html(video_description)
 | 
			
		||||
        else: video_description = u''
 | 
			
		||||
        video_description = None
 | 
			
		||||
        try:
 | 
			
		||||
            video_description = get_element_by_attribute("itemprop", "description", webpage)
 | 
			
		||||
            if video_description: video_description = clean_html(video_description)
 | 
			
		||||
        except AssertionError as err:
 | 
			
		||||
            # On some pages like (http://player.vimeo.com/video/54469442) the
 | 
			
		||||
            # html tags are not closed, python 2.6 cannot handle it
 | 
			
		||||
            if err.args[0] == 'we should not get here!':
 | 
			
		||||
                pass
 | 
			
		||||
            else:
 | 
			
		||||
                raise
 | 
			
		||||
 | 
			
		||||
        # Extract upload date
 | 
			
		||||
        video_upload_date = None
 | 
			
		||||
@@ -154,14 +175,15 @@ class VimeoIE(InfoExtractor):
 | 
			
		||||
        # TODO bind to format param
 | 
			
		||||
        codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
 | 
			
		||||
        files = { 'hd': [], 'sd': [], 'other': []}
 | 
			
		||||
        config_files = config["video"].get("files") or config["request"].get("files")
 | 
			
		||||
        for codec_name, codec_extension in codecs:
 | 
			
		||||
            if codec_name in config["video"]["files"]:
 | 
			
		||||
                if 'hd' in config["video"]["files"][codec_name]:
 | 
			
		||||
            if codec_name in config_files:
 | 
			
		||||
                if 'hd' in config_files[codec_name]:
 | 
			
		||||
                    files['hd'].append((codec_name, codec_extension, 'hd'))
 | 
			
		||||
                elif 'sd' in config["video"]["files"][codec_name]:
 | 
			
		||||
                elif 'sd' in config_files[codec_name]:
 | 
			
		||||
                    files['sd'].append((codec_name, codec_extension, 'sd'))
 | 
			
		||||
                else:
 | 
			
		||||
                    files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
 | 
			
		||||
                    files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
 | 
			
		||||
 | 
			
		||||
        for quality in ('hd', 'sd', 'other'):
 | 
			
		||||
            if len(files[quality]) > 0:
 | 
			
		||||
@@ -173,8 +195,12 @@ class VimeoIE(InfoExtractor):
 | 
			
		||||
        else:
 | 
			
		||||
            raise ExtractorError(u'No known codec found')
 | 
			
		||||
 | 
			
		||||
        video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
 | 
			
		||||
                    %(video_id, sig, timestamp, video_quality, video_codec.upper())
 | 
			
		||||
        video_url = None
 | 
			
		||||
        if isinstance(config_files[video_codec], dict):
 | 
			
		||||
            video_url = config_files[video_codec][video_quality].get("url")
 | 
			
		||||
        if video_url is None:
 | 
			
		||||
            video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
 | 
			
		||||
                        %(video_id, sig, timestamp, video_quality, video_codec.upper())
 | 
			
		||||
 | 
			
		||||
        return [{
 | 
			
		||||
            'id':       video_id,
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,6 @@ import re
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,8 @@ import re
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -36,15 +37,16 @@ class XHamsterIE(InfoExtractor):
 | 
			
		||||
            video_url = compat_urllib_parse.unquote(mobj.group('file'))
 | 
			
		||||
        else:
 | 
			
		||||
            video_url = mobj.group('server')+'/key='+mobj.group('file')
 | 
			
		||||
        video_extension = video_url.split('.')[-1]
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>',
 | 
			
		||||
            webpage, u'title')
 | 
			
		||||
 | 
			
		||||
        # Can't see the description anywhere in the UI
 | 
			
		||||
        # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
 | 
			
		||||
        #     webpage, u'description', fatal=False)
 | 
			
		||||
        # if video_description: video_description = unescapeHTML(video_description)
 | 
			
		||||
        # Only a few videos have an description
 | 
			
		||||
        mobj = re.search('<span>Description: </span>(?P<description>[^<]+)', webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
            video_description = unescapeHTML(mobj.group('description'))
 | 
			
		||||
        else:
 | 
			
		||||
            video_description = None
 | 
			
		||||
 | 
			
		||||
        mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
 | 
			
		||||
        if mobj:
 | 
			
		||||
@@ -62,9 +64,9 @@ class XHamsterIE(InfoExtractor):
 | 
			
		||||
        return [{
 | 
			
		||||
            'id':       video_id,
 | 
			
		||||
            'url':      video_url,
 | 
			
		||||
            'ext':      video_extension,
 | 
			
		||||
            'ext':      determine_ext(video_url),
 | 
			
		||||
            'title':    video_title,
 | 
			
		||||
            # 'description': video_description,
 | 
			
		||||
            'description': video_description,
 | 
			
		||||
            'upload_date': video_upload_date,
 | 
			
		||||
            'uploader_id': video_uploader_id,
 | 
			
		||||
            'thumbnail': video_thumbnail
 | 
			
		||||
 
 | 
			
		||||
@@ -12,14 +12,16 @@ from ..utils import (
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
from ..aes import (
 | 
			
		||||
    aes_decrypt_text
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
class YouPornIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
 | 
			
		||||
        u'file': u'505835.mp4',
 | 
			
		||||
        u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
 | 
			
		||||
        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"upload_date": u"20101221", 
 | 
			
		||||
            u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
 | 
			
		||||
@@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
 | 
			
		||||
        # Get all of the links from the page
 | 
			
		||||
        LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
 | 
			
		||||
        links = re.findall(LINK_RE, download_list_html)
 | 
			
		||||
        if(len(links) == 0):
 | 
			
		||||
        
 | 
			
		||||
        # Get link of hd video if available
 | 
			
		||||
        mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
 | 
			
		||||
        if mobj != None:
 | 
			
		||||
            encrypted_video_url = mobj.group(u'encrypted_video_url')
 | 
			
		||||
            video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
 | 
			
		||||
            links = [video_url] + links
 | 
			
		||||
        
 | 
			
		||||
        if not links:
 | 
			
		||||
            raise ExtractorError(u'ERROR: no known formats available for video')
 | 
			
		||||
 | 
			
		||||
        self.to_screen(u'Links found: %d' % len(links))
 | 
			
		||||
@@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
 | 
			
		||||
            self._print_formats(formats)
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        req_format = self._downloader.params.get('format', None)
 | 
			
		||||
        req_format = self._downloader.params.get('format', 'best')
 | 
			
		||||
        self.to_screen(u'Format: %s' % req_format)
 | 
			
		||||
 | 
			
		||||
        if req_format is None or req_format == 'best':
 | 
			
		||||
 
 | 
			
		||||
@@ -194,7 +194,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
    _VALID_URL = r"""^
 | 
			
		||||
                     (
 | 
			
		||||
                         (?:https?://)?                                       # http(s):// (optional)
 | 
			
		||||
                         (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 | 
			
		||||
                         (?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
 | 
			
		||||
                            tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
 | 
			
		||||
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 | 
			
		||||
                         (?:                                                  # the various things that can precede the ID:
 | 
			
		||||
@@ -205,15 +205,18 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
                                 (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
 | 
			
		||||
                                 v=
 | 
			
		||||
                             )
 | 
			
		||||
                         )?                                                   # optional -> youtube.com/xxxx is OK
 | 
			
		||||
                         ))
 | 
			
		||||
                         |youtu\.be/                                          # just youtu.be/xxxx
 | 
			
		||||
                         )
 | 
			
		||||
                     )?                                                       # all until now is optional -> you can pass the naked ID
 | 
			
		||||
                     ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
 | 
			
		||||
                     (?(1).+)?                                                # if we found the ID, everything can follow
 | 
			
		||||
                     $"""
 | 
			
		||||
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 | 
			
		||||
    # Listed in order of quality
 | 
			
		||||
    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
 | 
			
		||||
                          '95', '94', '93', '92', '132', '151',
 | 
			
		||||
    _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
 | 
			
		||||
                          # Apple HTTP Live Streaming
 | 
			
		||||
                          '96', '95', '94', '93', '92', '132', '151',
 | 
			
		||||
                          # 3D
 | 
			
		||||
                          '85', '84', '102', '83', '101', '82', '100',
 | 
			
		||||
                          # Dash video
 | 
			
		||||
@@ -222,8 +225,10 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
                          # Dash audio
 | 
			
		||||
                          '141', '172', '140', '171', '139',
 | 
			
		||||
                          ]
 | 
			
		||||
    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
 | 
			
		||||
                                      '95', '94', '93', '92', '132', '151',
 | 
			
		||||
    _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
 | 
			
		||||
                                      # Apple HTTP Live Streaming
 | 
			
		||||
                                      '96', '95', '94', '93', '92', '132', '151',
 | 
			
		||||
                                      # 3D
 | 
			
		||||
                                      '85', '102', '84', '101', '83', '100', '82',
 | 
			
		||||
                                      # Dash video
 | 
			
		||||
                                      '138', '248', '137', '247', '136', '246', '245',
 | 
			
		||||
@@ -231,11 +236,18 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
                                      # Dash audio
 | 
			
		||||
                                      '172', '141', '171', '140', '139',
 | 
			
		||||
                                      ]
 | 
			
		||||
    _video_formats_map = {
 | 
			
		||||
        'flv': ['35', '34', '6', '5'],
 | 
			
		||||
        '3gp': ['36', '17', '13'],
 | 
			
		||||
        'mp4': ['38', '37', '22', '18'],
 | 
			
		||||
        'webm': ['46', '45', '44', '43'],
 | 
			
		||||
    }
 | 
			
		||||
    _video_extensions = {
 | 
			
		||||
        '13': '3gp',
 | 
			
		||||
        '17': 'mp4',
 | 
			
		||||
        '17': '3gp',
 | 
			
		||||
        '18': 'mp4',
 | 
			
		||||
        '22': 'mp4',
 | 
			
		||||
        '36': '3gp',
 | 
			
		||||
        '37': 'mp4',
 | 
			
		||||
        '38': 'mp4',
 | 
			
		||||
        '43': 'webm',
 | 
			
		||||
@@ -252,7 +264,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
        '101': 'webm',
 | 
			
		||||
        '102': 'webm',
 | 
			
		||||
 | 
			
		||||
        # videos that use m3u8
 | 
			
		||||
        # Apple HTTP Live Streaming
 | 
			
		||||
        '92': 'mp4',
 | 
			
		||||
        '93': 'mp4',
 | 
			
		||||
        '94': 'mp4',
 | 
			
		||||
@@ -293,6 +305,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
        '22': '720x1280',
 | 
			
		||||
        '34': '360x640',
 | 
			
		||||
        '35': '480x854',
 | 
			
		||||
        '36': '240x320',
 | 
			
		||||
        '37': '1080x1920',
 | 
			
		||||
        '38': '3072x4096',
 | 
			
		||||
        '43': '360x640',
 | 
			
		||||
@@ -394,7 +407,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
            u"info_dict": {
 | 
			
		||||
                u"upload_date": u"20120506",
 | 
			
		||||
                u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
 | 
			
		||||
                u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
 | 
			
		||||
                u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
 | 
			
		||||
                u"uploader": u"Icona Pop",
 | 
			
		||||
                u"uploader_id": u"IconaPop"
 | 
			
		||||
            }
 | 
			
		||||
@@ -432,7 +445,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        """Receives a URL and returns True if suitable for this IE."""
 | 
			
		||||
        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
 | 
			
		||||
        if YoutubePlaylistIE.suitable(url): return False
 | 
			
		||||
        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 | 
			
		||||
 | 
			
		||||
    def report_video_webpage_download(self, video_id):
 | 
			
		||||
@@ -465,15 +478,15 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
        elif len(s) == 89:
 | 
			
		||||
            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
 | 
			
		||||
        elif len(s) == 88:
 | 
			
		||||
            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
 | 
			
		||||
            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
 | 
			
		||||
        elif len(s) == 87:
 | 
			
		||||
            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
 | 
			
		||||
        elif len(s) == 86:
 | 
			
		||||
            return s[5:20] + s[2] + s[21:]
 | 
			
		||||
            return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
 | 
			
		||||
        elif len(s) == 85:
 | 
			
		||||
            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
 | 
			
		||||
        elif len(s) == 84:
 | 
			
		||||
            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
 | 
			
		||||
            return s[81:36:-1] + s[0] + s[35:2:-1]
 | 
			
		||||
        elif len(s) == 83:
 | 
			
		||||
            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
 | 
			
		||||
        elif len(s) == 82:
 | 
			
		||||
@@ -537,13 +550,25 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
            video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
 | 
			
		||||
        else:
 | 
			
		||||
            # Specific formats. We pick the first in a slash-delimeted sequence.
 | 
			
		||||
            # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
 | 
			
		||||
            # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
 | 
			
		||||
            # available in the specified format. For example,
 | 
			
		||||
            # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
 | 
			
		||||
            # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
 | 
			
		||||
            # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
 | 
			
		||||
            req_formats = req_format.split('/')
 | 
			
		||||
            video_url_list = None
 | 
			
		||||
            for rf in req_formats:
 | 
			
		||||
                if rf in url_map:
 | 
			
		||||
                    video_url_list = [(rf, url_map[rf])]
 | 
			
		||||
                    break
 | 
			
		||||
                if rf in self._video_formats_map:
 | 
			
		||||
                    for srf in self._video_formats_map[rf]:
 | 
			
		||||
                        if srf in url_map:
 | 
			
		||||
                            video_url_list = [(srf, url_map[srf])]
 | 
			
		||||
                            break
 | 
			
		||||
                    else:
 | 
			
		||||
                        continue
 | 
			
		||||
                    break
 | 
			
		||||
            if video_url_list is None:
 | 
			
		||||
                raise ExtractorError(u'requested format not available')
 | 
			
		||||
        return video_url_list
 | 
			
		||||
@@ -558,7 +583,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
 | 
			
		||||
        manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
 | 
			
		||||
        formats_urls = _get_urls(manifest)
 | 
			
		||||
        for format_url in formats_urls:
 | 
			
		||||
            itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
 | 
			
		||||
            itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
 | 
			
		||||
            url_map[itag] = format_url
 | 
			
		||||
        return url_map
 | 
			
		||||
 | 
			
		||||
@@ -860,8 +885,11 @@ class YoutubePlaylistIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
            for entry in response['feed']['entry']:
 | 
			
		||||
                index = entry['yt$position']['$t']
 | 
			
		||||
                if 'media$group' in entry and 'media$player' in entry['media$group']:
 | 
			
		||||
                    videos.append((index, entry['media$group']['media$player']['url']))
 | 
			
		||||
                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
 | 
			
		||||
                    videos.append((
 | 
			
		||||
                        index,
 | 
			
		||||
                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
 | 
			
		||||
                    ))
 | 
			
		||||
 | 
			
		||||
        videos = [v[1] for v in sorted(videos)]
 | 
			
		||||
 | 
			
		||||
@@ -927,13 +955,20 @@ class YoutubeChannelIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
class YoutubeUserIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
 | 
			
		||||
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
 | 
			
		||||
    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
 | 
			
		||||
    _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
 | 
			
		||||
    _GDATA_PAGE_SIZE = 50
 | 
			
		||||
    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
 | 
			
		||||
    _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
 | 
			
		||||
    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
 | 
			
		||||
    IE_NAME = u'youtube:user'
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def suitable(cls, url):
 | 
			
		||||
        # Don't return True if the url can be extracted with other youtube
 | 
			
		||||
        # extractor, the regex would is too permissive and it would match.
 | 
			
		||||
        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
 | 
			
		||||
        if any(ie.suitable(url) for ie in other_ies): return False
 | 
			
		||||
        else: return super(YoutubeUserIE, cls).suitable(url)
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        # Extract username
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
@@ -956,13 +991,15 @@ class YoutubeUserIE(InfoExtractor):
 | 
			
		||||
            page = self._download_webpage(gdata_url, username,
 | 
			
		||||
                                          u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                response = json.loads(page)
 | 
			
		||||
            except ValueError as err:
 | 
			
		||||
                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
 | 
			
		||||
 | 
			
		||||
            # Extract video identifiers
 | 
			
		||||
            ids_in_page = []
 | 
			
		||||
 | 
			
		||||
            for mobj in re.finditer(self._VIDEO_INDICATOR, page):
 | 
			
		||||
                if mobj.group(1) not in ids_in_page:
 | 
			
		||||
                    ids_in_page.append(mobj.group(1))
 | 
			
		||||
 | 
			
		||||
            for entry in response['feed']['entry']:
 | 
			
		||||
                ids_in_page.append(entry['id']['$t'].split('/')[-1])
 | 
			
		||||
            video_ids.extend(ids_in_page)
 | 
			
		||||
 | 
			
		||||
            # A little optimization - if current page is not
 | 
			
		||||
@@ -1101,7 +1138,7 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
 | 
			
		||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
 | 
			
		||||
    IE_NAME = u'youtube:favorites'
 | 
			
		||||
    IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
 | 
			
		||||
    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
 | 
			
		||||
    _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
 | 
			
		||||
    _LOGIN_REQUIRED = True
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,19 +1,20 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
import datetime
 | 
			
		||||
import email.utils
 | 
			
		||||
import errno
 | 
			
		||||
import gzip
 | 
			
		||||
import io
 | 
			
		||||
import json
 | 
			
		||||
import locale
 | 
			
		||||
import os
 | 
			
		||||
import platform
 | 
			
		||||
import re
 | 
			
		||||
import socket
 | 
			
		||||
import sys
 | 
			
		||||
import traceback
 | 
			
		||||
import zlib
 | 
			
		||||
import email.utils
 | 
			
		||||
import socket
 | 
			
		||||
import datetime
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    import urllib.request as compat_urllib_request
 | 
			
		||||
@@ -60,6 +61,11 @@ try:
 | 
			
		||||
except ImportError: # Python 2
 | 
			
		||||
    import httplib as compat_http_client
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from urllib.error import HTTPError as compat_HTTPError
 | 
			
		||||
except ImportError:  # Python 2
 | 
			
		||||
    from urllib2 import HTTPError as compat_HTTPError
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from subprocess import DEVNULL
 | 
			
		||||
    compat_subprocess_get_DEVNULL = lambda: DEVNULL
 | 
			
		||||
@@ -207,7 +213,7 @@ if sys.version_info >= (2,7):
 | 
			
		||||
    def find_xpath_attr(node, xpath, key, val):
 | 
			
		||||
        """ Find the xpath xpath[@key=val] """
 | 
			
		||||
        assert re.match(r'^[a-zA-Z]+$', key)
 | 
			
		||||
        assert re.match(r'^[a-zA-Z@\s]*$', val)
 | 
			
		||||
        assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
 | 
			
		||||
        expr = xpath + u"[@%s='%s']" % (key, val)
 | 
			
		||||
        return node.find(expr)
 | 
			
		||||
else:
 | 
			
		||||
@@ -489,7 +495,7 @@ def make_HTTPS_handler(opts):
 | 
			
		||||
 | 
			
		||||
class ExtractorError(Exception):
 | 
			
		||||
    """Error during info extraction."""
 | 
			
		||||
    def __init__(self, msg, tb=None, expected=False):
 | 
			
		||||
    def __init__(self, msg, tb=None, expected=False, cause=None):
 | 
			
		||||
        """ tb, if given, is the original traceback (so that it can be printed out).
 | 
			
		||||
        If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 | 
			
		||||
        """
 | 
			
		||||
@@ -502,6 +508,7 @@ class ExtractorError(Exception):
 | 
			
		||||
 | 
			
		||||
        self.traceback = tb
 | 
			
		||||
        self.exc_info = sys.exc_info()  # preserve original exception
 | 
			
		||||
        self.cause = cause
 | 
			
		||||
 | 
			
		||||
    def format_traceback(self):
 | 
			
		||||
        if self.traceback is None:
 | 
			
		||||
@@ -622,8 +629,23 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 | 
			
		||||
        old_resp = resp
 | 
			
		||||
        # gzip
 | 
			
		||||
        if resp.headers.get('Content-encoding', '') == 'gzip':
 | 
			
		||||
            gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
 | 
			
		||||
            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 | 
			
		||||
            content = resp.read()
 | 
			
		||||
            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 | 
			
		||||
            try:
 | 
			
		||||
                uncompressed = io.BytesIO(gz.read())
 | 
			
		||||
            except IOError as original_ioerror:
 | 
			
		||||
                # There may be junk add the end of the file
 | 
			
		||||
                # See http://stackoverflow.com/q/4928560/35070 for details
 | 
			
		||||
                for i in range(1, 1024):
 | 
			
		||||
                    try:
 | 
			
		||||
                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 | 
			
		||||
                        uncompressed = io.BytesIO(gz.read())
 | 
			
		||||
                    except IOError:
 | 
			
		||||
                        continue
 | 
			
		||||
                    break
 | 
			
		||||
                else:
 | 
			
		||||
                    raise original_ioerror
 | 
			
		||||
            resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 | 
			
		||||
            resp.msg = old_resp.msg
 | 
			
		||||
        # deflate
 | 
			
		||||
        if resp.headers.get('Content-encoding', '') == 'deflate':
 | 
			
		||||
@@ -711,3 +733,31 @@ class DateRange(object):
 | 
			
		||||
        return self.start <= date <= self.end
 | 
			
		||||
    def __str__(self):
 | 
			
		||||
        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def platform_name():
 | 
			
		||||
    """ Returns the platform name as a compat_str """
 | 
			
		||||
    res = platform.platform()
 | 
			
		||||
    if isinstance(res, bytes):
 | 
			
		||||
        res = res.decode(preferredencoding())
 | 
			
		||||
 | 
			
		||||
    assert isinstance(res, compat_str)
 | 
			
		||||
    return res
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def bytes_to_intlist(bs):
 | 
			
		||||
    if not bs:
 | 
			
		||||
        return []
 | 
			
		||||
    if isinstance(bs[0], int):  # Python 3
 | 
			
		||||
        return list(bs)
 | 
			
		||||
    else:
 | 
			
		||||
        return [ord(c) for c in bs]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def intlist_to_bytes(xs):
 | 
			
		||||
    if not xs:
 | 
			
		||||
        return b''
 | 
			
		||||
    if isinstance(chr(0), bytes):  # Python 2
 | 
			
		||||
        return ''.join([chr(x) for x in xs])
 | 
			
		||||
    else:
 | 
			
		||||
        return bytes(xs)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,2 +1,2 @@
 | 
			
		||||
 | 
			
		||||
__version__ = '2013.08.22'
 | 
			
		||||
__version__ = '2013.09.06.1'
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user