mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 03:27:06 -05:00 
			
		
		
		
	[utils] Add a function to sanitize consecutive slashes in URLs
This commit is contained in:
		@@ -54,6 +54,7 @@ from youtube_dl.utils import (
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
    render_table,
 | 
			
		||||
    match_str,
 | 
			
		||||
    url_sanitize_consecutive_slashes,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -501,6 +502,21 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
 | 
			
		||||
            'like_count > 100 & dislike_count <? 50 & description',
 | 
			
		||||
            {'like_count': 190, 'dislike_count': 10}))
 | 
			
		||||
 | 
			
		||||
    def test_url_sanitize_consecutive_slashes(self):
 | 
			
		||||
        self.assertEqual(url_sanitize_consecutive_slashes(
 | 
			
		||||
            'http://hostname/foo//bar/filename.html'),
 | 
			
		||||
            'http://hostname/foo/bar/filename.html')
 | 
			
		||||
        self.assertEqual(url_sanitize_consecutive_slashes(
 | 
			
		||||
            'http://hostname//foo/bar/filename.html'),
 | 
			
		||||
            'http://hostname/foo/bar/filename.html')
 | 
			
		||||
        self.assertEqual(url_sanitize_consecutive_slashes(
 | 
			
		||||
            'http://hostname//'), 'http://hostname/')
 | 
			
		||||
        self.assertEqual(url_sanitize_consecutive_slashes(
 | 
			
		||||
            'http://hostname/foo/bar/filename.html'),
 | 
			
		||||
            'http://hostname/foo/bar/filename.html')
 | 
			
		||||
        self.assertEqual(url_sanitize_consecutive_slashes(
 | 
			
		||||
            'http://hostname/'), 'http://hostname/')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,7 @@ from ..compat import (
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_urllib_request
 | 
			
		||||
)
 | 
			
		||||
from ..utils import url_sanitize_consecutive_slashes
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SohuIE(InfoExtractor):
 | 
			
		||||
@@ -105,11 +106,8 @@ class SohuIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
                part_info = part_str.split('|')
 | 
			
		||||
 | 
			
		||||
                # Sanitize URL to prevent download failure
 | 
			
		||||
                if part_info[0][-1] == '/' and su[i][0] == '/':
 | 
			
		||||
                    su[i] = su[i][1:]
 | 
			
		||||
 | 
			
		||||
                video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
 | 
			
		||||
                video_url = url_sanitize_consecutive_slashes(
 | 
			
		||||
                    '%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
 | 
			
		||||
 | 
			
		||||
                formats.append({
 | 
			
		||||
                    'url': video_url,
 | 
			
		||||
 
 | 
			
		||||
@@ -1789,3 +1789,18 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 | 
			
		||||
            return None  # No Proxy
 | 
			
		||||
        return compat_urllib_request.ProxyHandler.proxy_open(
 | 
			
		||||
            self, req, proxy, type)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def url_sanitize_consecutive_slashes(url):
 | 
			
		||||
    """Sanitize URLs with consecutive slashes
 | 
			
		||||
 | 
			
		||||
    For example, transform both
 | 
			
		||||
        http://hostname/foo//bar/filename.html
 | 
			
		||||
    and
 | 
			
		||||
        http://hostname//foo/bar/filename.html
 | 
			
		||||
    into
 | 
			
		||||
        http://hostname/foo/bar/filename.html
 | 
			
		||||
    """
 | 
			
		||||
    parsed_url = list(compat_urlparse.urlparse(url))
 | 
			
		||||
    parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
 | 
			
		||||
    return compat_urlparse.urlunparse(parsed_url)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user