1
0
mirror of https://gitlab.com/ytdl-org/youtube-dl.git synced 2026-04-25 00:00:04 -04:00

Compare commits

..

17 Commits

Author SHA1 Message Date
Philipp Hagemeister f1ed3acae5 release 2016.02.04 2016-02-04 13:39:26 +01:00
remitamine 920d21b9d3 [test_subtitles] update youtube subtitles tests 2016-02-04 08:50:55 +01:00
remitamine 2fb35d1c28 [youtube] fix subtitle order 2016-02-04 08:39:01 +01:00
remitamine 09be85b8dd [youtube] fix subtitle extraction(fixes #8415) 2016-02-04 08:28:37 +01:00
remitamine eadc3ccd50 [generic] extract m3u8 formats when mpegurl content type detected 2016-02-04 01:25:36 +01:00
Yen Chi Hsuan 58be922079 [kuwo] Check for georestriction 2016-02-04 01:26:25 +08:00
Sergey M c84d3a557d [README.md] Clarify unavailable sequences in output format 2016-02-03 19:18:25 +05:00
remitamine 6ad2b01e14 [srgssr] use flv as ext for rtmp formats 2016-02-02 23:09:50 +01:00
remitamine fd3a1f3d60 [cbsnews] add support for live videos(fixes #7010) 2016-02-02 23:02:18 +01:00
Jaime Marquínez Ferrándiz 87de7069b9 [utils] dfxp2srt: make TTMLPElementParser inherit from object
For consistency between python 2 and 3.
2016-02-02 22:30:13 +01:00
remitamine 6fba62c87a [ffmpeg] fix adding metadata when using --hls-prefer-native(#8350) 2016-02-02 22:14:23 +01:00
Yen Chi Hsuan 1df4141196 [test_YoutubeDL] Fix test_youtube_format_selection
Broken since a6c2c24479. Thanks to
@jaimeMF and @anisse for pointing that out
2016-02-03 03:42:37 +08:00
remitamine fae45ede08 Merge pull request #8354 from remitamine/m3u8_metadata
[ffmpeg] fix adding metadata when using m3u8_native(fixes #8350)
2016-02-02 19:13:58 +01:00
remitamine 4e0cff2a50 Merge pull request #8348 from remitamine/dfxp2srt-text
[utils] fix dfxp2srt text extraction(fixes #8055)
2016-02-02 18:36:26 +01:00
Sergey M․ 0436157b95 [vk:uservideos] Improve _VALID_URL (Closes #8389) 2016-02-02 00:52:37 +06:00
remitamine cf57433bbd [ffmpeg] fix adding metadata when using m3u8_native(fixes #8350) 2016-01-28 18:57:32 +01:00
remitamine 2b14cb566f [utils] fix dfxp2srt text extraction(fixes #8055) 2016-01-28 12:38:34 +01:00
14 changed files with 114 additions and 41 deletions
+2
View File
@@ -455,6 +455,8 @@ The `-o` option allows users to indicate a template for the output file names. T
- `format_id`: The sequence will be replaced by the format code specified by `--format`.
- `duration`: The sequence will be replaced by the length of the video in seconds.
Note that some of the aforementioned sequences are not guaranteed to be present since they depend on the metadata obtained by particular extractor, such sequences will be replaced with `NA`.
The current default template is `%(title)s-%(id)s.%(ext)s`.
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
+1
View File
@@ -91,6 +91,7 @@
- **Canvas**
- **CBS**
- **CBSNews**: CBS News
- **CBSNewsLiveVideo**: CBS News Live Videos
- **CBSSports**
- **CeskaTelevize**
- **channel9**: Channel 9
+11
View File
@@ -248,6 +248,17 @@ class TestFormatSelection(unittest.TestCase):
def format_info(f_id):
info = YoutubeIE._formats[f_id].copy()
# XXX: In real cases InfoExtractor._parse_mpd() fills up 'acodec'
# and 'vcodec', while in tests such information is incomplete since
# commit a6c2c24479e5f4827ceb06f64d855329c0a6f593
# test_YoutubeDL.test_youtube_format_selection is broken without
# this fix
if 'acodec' in info and 'vcodec' not in info:
info['vcodec'] = 'none'
elif 'vcodec' in info and 'acodec' not in info:
info['acodec'] = 'none'
info['format_id'] = f_id
info['url'] = 'url:' + f_id
return info
+6 -6
View File
@@ -65,16 +65,16 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
for lang in ['it', 'fr', 'de']:
self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06')
self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5')
for lang in ['fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
def test_youtube_subtitles_sbv_format(self):
def test_youtube_subtitles_ttml_format(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitlesformat'] = 'sbv'
self.DL.params['subtitlesformat'] = 'ttml'
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54')
def test_youtube_subtitles_vtt_format(self):
self.DL.params['writesubtitles'] = True
+4 -1
View File
@@ -90,7 +90,10 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .canvas import CanvasIE
from .cbs import CBSIE
from .cbsnews import CBSNewsIE
from .cbsnews import (
CBSNewsIE,
CBSNewsLiveVideoIE,
)
from .cbssports import CBSSportsIE
from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE
+44 -8
View File
@@ -1,15 +1,14 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from .theplatform import ThePlatformIE
from ..utils import parse_duration
class CBSNewsIE(ThePlatformIE):
IE_DESC = 'CBS News'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
_TESTS = [
{
@@ -48,14 +47,13 @@ class CBSNewsIE(ThePlatformIE):
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_info = json.loads(self._html_search_regex(
video_info = self._parse_json(self._html_search_regex(
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
webpage, 'video JSON info'))
webpage, 'video JSON info'), video_id)
item = video_info['item'] if 'item' in video_info else video_info
title = item.get('articleTitle') or item.get('hed')
@@ -88,3 +86,41 @@ class CBSNewsIE(ThePlatformIE):
'formats': formats,
'subtitles': subtitles,
}
class CBSNewsLiveVideoIE(InfoExtractor):
IE_DESC = 'CBS News Live Videos'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
_TEST = {
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
'info_dict': {
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
'ext': 'flv',
'title': 'Clinton, Sanders Prepare To Face Off In NH',
'duration': 334,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_info = self._parse_json(self._html_search_regex(
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
hdcore_sign = 'hdcore=3.3.1'
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
if f4m_formats:
for entry in f4m_formats:
# URLs without the extra param induce an 404 error
entry.update({'extra_param_to_segment_url': hdcore_sign})
return {
'id': video_id,
'title': video_info['headline'],
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
'duration': parse_duration(video_info.get('segmentDur')),
'formats': f4m_formats,
}
+11 -6
View File
@@ -1229,19 +1229,24 @@ class GenericIE(InfoExtractor):
# Check for direct link to a video
content_type = head_response.headers.get('Content-Type', '')
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
if m:
upload_date = unified_strdate(
head_response.headers.get('Last-Modified'))
formats = []
if m.group('format_id').endswith('mpegurl'):
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
else:
formats = [{
'format_id': m.group('format_id'),
'url': url,
'vcodec': 'none' if m.group('type') == 'audio' else None
}]
return {
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'direct': True,
'formats': [{
'format_id': m.group('format_id'),
'url': url,
'vcodec': 'none' if m.group('type') == 'audio' else None
}],
'formats': formats,
'upload_date': upload_date,
}
+4
View File
@@ -31,6 +31,10 @@ class KuwoBaseIE(InfoExtractor):
(file_format['ext'], file_format.get('br', ''), song_id),
song_id, note='Download %s url info' % file_format['format'],
)
if song_url == 'IPDeny':
raise ExtractorError('This song is blocked in this region', expected=True)
if song_url.startswith('http://') or song_url.startswith('https://'):
formats.append({
'url': song_url,
+1 -4
View File
@@ -70,14 +70,11 @@ class SRGSSRIE(InfoExtractor):
asset_url, media_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
else:
ext = None
if protocol == 'RTMP':
ext = self._search_regex(r'([a-z0-9]+):[^/]+', asset_url, 'ext')
formats.append({
'format_id': format_id,
'url': asset_url,
'preference': preference(quality),
'ext': ext,
'ext': 'flv' if protocol == 'RTMP' else None,
})
self._sort_formats(formats)
+4 -1
View File
@@ -321,7 +321,7 @@ class VKIE(InfoExtractor):
class VKUserVideosIE(InfoExtractor):
IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
_VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
_TEMPLATE_URL = 'https://vk.com/videos'
_TESTS = [{
'url': 'http://vk.com/videos205387401',
@@ -333,6 +333,9 @@ class VKUserVideosIE(InfoExtractor):
}, {
'url': 'http://vk.com/videos-77521',
'only_matching': True,
}, {
'url': 'http://vk.com/videos-97664626?section=all',
'only_matching': True,
}]
def _real_extract(self, url):
+1 -1
View File
@@ -918,7 +918,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if lang in sub_lang_list:
continue
sub_formats = []
for ext in ['sbv', 'vtt', 'srt']:
for ext in ['ttml', 'vtt']:
params = compat_urllib_parse.urlencode({
'lang': lang,
'v': video_id,
+4
View File
@@ -391,6 +391,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)])
# https://github.com/rg3/youtube-dl/issues/8350
if info['protocol'] == 'm3u8_native' or self._downloader.params.get('hls_prefer_native', False):
options.extend(['-bsf:a', 'aac_adtstoasc'])
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
+20 -13
View File
@@ -2017,20 +2017,27 @@ def dfxp2srt(dfxp_data):
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
})
class TTMLPElementParser(object):
out = ''
def start(self, tag, attrib):
if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
self.out += '\n'
def end(self, tag):
pass
def data(self, data):
self.out += data
def close(self):
return self.out.strip()
def parse_node(node):
str_or_empty = functools.partial(str_or_none, default='')
out = str_or_empty(node.text)
for child in node:
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
out += '\n' + str_or_empty(child.tail)
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
out += str_or_empty(parse_node(child))
else:
out += str_or_empty(xml.etree.ElementTree.tostring(child))
return out
target = TTMLPElementParser()
parser = xml.etree.ElementTree.XMLParser(target=target)
parser.feed(xml.etree.ElementTree.tostring(node))
return parser.close()
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
out = []
+1 -1
View File
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2016.02.01'
__version__ = '2016.02.04'