mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-03 20:57:07 -05:00 
			
		
		
		
	Ignore BOM in batch files (Fixes #2450)
This commit is contained in:
		@@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Various small unit tests
 | 
			
		||||
import io
 | 
			
		||||
import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
#from youtube_dl.utils import htmlentity_transform
 | 
			
		||||
@@ -21,6 +22,7 @@ from youtube_dl.utils import (
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    PagedList,
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    read_batch_urls,
 | 
			
		||||
    sanitize_filename,
 | 
			
		||||
    shell_quote,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
@@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
    def test_struct_unpack(self):
 | 
			
		||||
        self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
 | 
			
		||||
 | 
			
		||||
    def test_read_batch_urls(self):
 | 
			
		||||
        f = io.StringIO(u'''\xef\xbb\xbf foo
 | 
			
		||||
            bar\r
 | 
			
		||||
            baz
 | 
			
		||||
            # More after this line\r
 | 
			
		||||
            ; or after this
 | 
			
		||||
            bam''')
 | 
			
		||||
        self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -71,6 +71,7 @@ from .utils import (
 | 
			
		||||
    get_cachedir,
 | 
			
		||||
    MaxDownloadsReached,
 | 
			
		||||
    preferredencoding,
 | 
			
		||||
    read_batch_urls,
 | 
			
		||||
    SameFileError,
 | 
			
		||||
    setproctitle,
 | 
			
		||||
    std_headers,
 | 
			
		||||
@@ -552,21 +553,19 @@ def _real_main(argv=None):
 | 
			
		||||
        sys.exit(0)
 | 
			
		||||
 | 
			
		||||
    # Batch file verification
 | 
			
		||||
    batchurls = []
 | 
			
		||||
    batch_urls = []
 | 
			
		||||
    if opts.batchfile is not None:
 | 
			
		||||
        try:
 | 
			
		||||
            if opts.batchfile == '-':
 | 
			
		||||
                batchfd = sys.stdin
 | 
			
		||||
            else:
 | 
			
		||||
                batchfd = open(opts.batchfile, 'r')
 | 
			
		||||
            batchurls = batchfd.readlines()
 | 
			
		||||
            batchurls = [x.strip() for x in batchurls]
 | 
			
		||||
            batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
 | 
			
		||||
                batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
 | 
			
		||||
            batch_urls = read_batch_urls(batchfd)
 | 
			
		||||
            if opts.verbose:
 | 
			
		||||
                write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
 | 
			
		||||
                write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
 | 
			
		||||
        except IOError:
 | 
			
		||||
            sys.exit(u'ERROR: batch file could not be read')
 | 
			
		||||
    all_urls = batchurls + args
 | 
			
		||||
    all_urls = batch_urls + args
 | 
			
		||||
    all_urls = [url.strip() for url in all_urls]
 | 
			
		||||
    _enc = preferredencoding()
 | 
			
		||||
    all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# -*- coding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
import contextlib
 | 
			
		||||
import ctypes
 | 
			
		||||
import datetime
 | 
			
		||||
import email.utils
 | 
			
		||||
@@ -1245,3 +1246,19 @@ except TypeError:
 | 
			
		||||
else:
 | 
			
		||||
    struct_pack = struct.pack
 | 
			
		||||
    struct_unpack = struct.unpack
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def read_batch_urls(batch_fd):
 | 
			
		||||
    def fixup(url):
 | 
			
		||||
        if not isinstance(url, compat_str):
 | 
			
		||||
            url = url.decode('utf-8', 'replace')
 | 
			
		||||
        BOM_UTF8 = u'\xef\xbb\xbf'
 | 
			
		||||
        if url.startswith(BOM_UTF8):
 | 
			
		||||
            url = url[len(BOM_UTF8):]
 | 
			
		||||
        url = url.strip()
 | 
			
		||||
        if url.startswith(('#', ';', ']')):
 | 
			
		||||
            return False
 | 
			
		||||
        return url
 | 
			
		||||
 | 
			
		||||
    with contextlib.closing(batch_fd) as fd:
 | 
			
		||||
        return [url for url in map(fixup, fd) if url]
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user