mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	[YoutubeDL] format spec: correctly handle dashes and other unused operators
'mp4-baseline-16x9' must be handled as a single string, but the '-' was treated as an operator.
This commit is contained in:
		@@ -105,6 +105,7 @@ class TestFormatSelection(unittest.TestCase):
 | 
			
		||||
    def test_format_selection(self):
 | 
			
		||||
        formats = [
 | 
			
		||||
            {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
 | 
			
		||||
            {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
 | 
			
		||||
            {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
 | 
			
		||||
            {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
 | 
			
		||||
            {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
 | 
			
		||||
@@ -136,6 +137,11 @@ class TestFormatSelection(unittest.TestCase):
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], '35')
 | 
			
		||||
 | 
			
		||||
        ydl = YDL({'format': 'example-with-dashes'})
 | 
			
		||||
        ydl.process_ie_result(info_dict.copy())
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'example-with-dashes')
 | 
			
		||||
 | 
			
		||||
    def test_format_selection_audio(self):
 | 
			
		||||
        formats = [
 | 
			
		||||
            {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
 | 
			
		||||
 
 | 
			
		||||
@@ -933,6 +933,37 @@ class YoutubeDL(object):
 | 
			
		||||
                else:
 | 
			
		||||
                    filter_parts.append(string)
 | 
			
		||||
 | 
			
		||||
        def _remove_unused_ops(tokens):
 | 
			
		||||
            # Remove operators that we don't use and join them with the sourrounding strings
 | 
			
		||||
            # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 | 
			
		||||
            ALLOWED_OPS = ('/', '+', ',', '(', ')')
 | 
			
		||||
            last_string, last_start, last_end, last_line = None, None, None, None
 | 
			
		||||
            for type, string, start, end, line in tokens:
 | 
			
		||||
                if type == tokenize.OP and string == '[':
 | 
			
		||||
                    if last_string:
 | 
			
		||||
                        yield tokenize.NAME, last_string, last_start, last_end, last_line
 | 
			
		||||
                        last_string = None
 | 
			
		||||
                    yield type, string, start, end, line
 | 
			
		||||
                    # everything inside brackets will be handled by _parse_filter
 | 
			
		||||
                    for type, string, start, end, line in tokens:
 | 
			
		||||
                        yield type, string, start, end, line
 | 
			
		||||
                        if type == tokenize.OP and string == ']':
 | 
			
		||||
                            break
 | 
			
		||||
                elif type == tokenize.OP and string in ALLOWED_OPS:
 | 
			
		||||
                    if last_string:
 | 
			
		||||
                        yield tokenize.NAME, last_string, last_start, last_end, last_line
 | 
			
		||||
                        last_string = None
 | 
			
		||||
                    yield type, string, start, end, line
 | 
			
		||||
                elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 | 
			
		||||
                    if not last_string:
 | 
			
		||||
                        last_string = string
 | 
			
		||||
                        last_start = start
 | 
			
		||||
                        last_end = end
 | 
			
		||||
                    else:
 | 
			
		||||
                        last_string += string
 | 
			
		||||
            if last_string:
 | 
			
		||||
                yield tokenize.NAME, last_string, last_start, last_end, last_line
 | 
			
		||||
 | 
			
		||||
        def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 | 
			
		||||
            selectors = []
 | 
			
		||||
            current_selector = None
 | 
			
		||||
@@ -1111,7 +1142,7 @@ class YoutubeDL(object):
 | 
			
		||||
 | 
			
		||||
        stream = io.BytesIO(format_spec.encode('utf-8'))
 | 
			
		||||
        try:
 | 
			
		||||
            tokens = list(compat_tokenize_tokenize(stream.readline))
 | 
			
		||||
            tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
 | 
			
		||||
        except tokenize.TokenError:
 | 
			
		||||
            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user