mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 00:37:06 -05:00 
			
		
		
		
	[f4m] Prefer baseURL for relative URLs (closes #14660)
This commit is contained in:
		@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
 | 
			
		||||
                       media))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _add_ns(prop):
 | 
			
		||||
    return '{http://ns.adobe.com/f4m/1.0}%s' % prop
 | 
			
		||||
def _add_ns(prop, ver=1):
 | 
			
		||||
    return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_base_url(manifest):
 | 
			
		||||
    base_url = xpath_text(
 | 
			
		||||
        manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
 | 
			
		||||
        'base URL', default=None)
 | 
			
		||||
    if base_url:
 | 
			
		||||
        base_url = base_url.strip()
 | 
			
		||||
    return base_url
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class F4mFD(FragmentFD):
 | 
			
		||||
@@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
 | 
			
		||||
            rate, media = list(filter(
 | 
			
		||||
                lambda f: int(f[0]) == requested_bitrate, formats))[0]
 | 
			
		||||
 | 
			
		||||
        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
 | 
			
		||||
        # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
 | 
			
		||||
        man_base_url = get_base_url(doc) or man_url
 | 
			
		||||
 | 
			
		||||
        base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
 | 
			
		||||
        bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
 | 
			
		||||
        # From Adobe F4M 3.0 spec:
 | 
			
		||||
        # The <baseURL> element SHALL be the base URL for all relative
 | 
			
		||||
        # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
 | 
			
		||||
        # URLs should be relative to the location of the containing document.
 | 
			
		||||
        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
 | 
			
		||||
        boot_info, bootstrap_url = self._parse_bootstrap_node(
 | 
			
		||||
            bootstrap_node, man_base_url)
 | 
			
		||||
        live = boot_info['live']
 | 
			
		||||
        metadata_node = media.find(_add_ns('metadata'))
 | 
			
		||||
        if metadata_node is not None:
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,10 @@ from ..compat import (
 | 
			
		||||
    compat_urlparse,
 | 
			
		||||
    compat_xml_parse_error,
 | 
			
		||||
)
 | 
			
		||||
from ..downloader.f4m import remove_encrypted_media
 | 
			
		||||
from ..downloader.f4m import (
 | 
			
		||||
    get_base_url,
 | 
			
		||||
    remove_encrypted_media,
 | 
			
		||||
)
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    NO_DEFAULT,
 | 
			
		||||
    age_restricted,
 | 
			
		||||
@@ -1239,11 +1242,8 @@ class InfoExtractor(object):
 | 
			
		||||
        media_nodes = remove_encrypted_media(media_nodes)
 | 
			
		||||
        if not media_nodes:
 | 
			
		||||
            return formats
 | 
			
		||||
        base_url = xpath_text(
 | 
			
		||||
            manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
 | 
			
		||||
            'base URL', default=None)
 | 
			
		||||
        if base_url:
 | 
			
		||||
            base_url = base_url.strip()
 | 
			
		||||
 | 
			
		||||
        manifest_base_url = get_base_url(manifest)
 | 
			
		||||
 | 
			
		||||
        bootstrap_info = xpath_element(
 | 
			
		||||
            manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
 | 
			
		||||
@@ -1275,7 +1275,7 @@ class InfoExtractor(object):
 | 
			
		||||
                    continue
 | 
			
		||||
                manifest_url = (
 | 
			
		||||
                    media_url if media_url.startswith('http://') or media_url.startswith('https://')
 | 
			
		||||
                    else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
 | 
			
		||||
                    else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
 | 
			
		||||
                # If media_url is itself a f4m manifest do the recursive extraction
 | 
			
		||||
                # since bitrates in parent manifest (this one) and media_url manifest
 | 
			
		||||
                # may differ leading to inability to resolve the format by requested
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user