mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 10:27:07 -05:00 
			
		
		
		
	Add webpage_url_basename info_dict field (Fixes #1938)
This commit is contained in:
		@@ -13,20 +13,21 @@ import xml.etree.ElementTree
 | 
			
		||||
 | 
			
		||||
#from youtube_dl.utils import htmlentity_transform
 | 
			
		||||
from youtube_dl.utils import (
 | 
			
		||||
    timeconvert,
 | 
			
		||||
    sanitize_filename,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    DateRange,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    encodeFilename,
 | 
			
		||||
    find_xpath_attr,
 | 
			
		||||
    get_meta_content,
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    sanitize_filename,
 | 
			
		||||
    shell_quote,
 | 
			
		||||
    encodeFilename,
 | 
			
		||||
    smuggle_url,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
    timeconvert,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    unsmuggle_url,
 | 
			
		||||
    url_basename,
 | 
			
		||||
    xpath_with_ns,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
if sys.version_info < (3, 0):
 | 
			
		||||
@@ -181,6 +182,12 @@ class TestUtil(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(str_to_int('123,456'), 123456)
 | 
			
		||||
        self.assertEqual(str_to_int('123.456'), 123456)
 | 
			
		||||
 | 
			
		||||
    def test_url_basename(self):
 | 
			
		||||
        self.assertEqual(url_basename(u'http://foo.de/'), u'')
 | 
			
		||||
        self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
 | 
			
		||||
        self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
 | 
			
		||||
        self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
 | 
			
		||||
        self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -47,6 +47,7 @@ from .utils import (
 | 
			
		||||
    subtitles_filename,
 | 
			
		||||
    takewhile_inclusive,
 | 
			
		||||
    UnavailableVideoError,
 | 
			
		||||
    url_basename,
 | 
			
		||||
    write_json_file,
 | 
			
		||||
    write_string,
 | 
			
		||||
    YoutubeDLHandler,
 | 
			
		||||
@@ -484,6 +485,7 @@ class YoutubeDL(object):
 | 
			
		||||
                    {
 | 
			
		||||
                        'extractor': ie.IE_NAME,
 | 
			
		||||
                        'webpage_url': url,
 | 
			
		||||
                        'webpage_url_basename': url_basename(url),
 | 
			
		||||
                        'extractor_key': ie.ie_key(),
 | 
			
		||||
                    })
 | 
			
		||||
                if process:
 | 
			
		||||
@@ -576,6 +578,7 @@ class YoutubeDL(object):
 | 
			
		||||
                    'playlist_index': i + playliststart,
 | 
			
		||||
                    'extractor': ie_result['extractor'],
 | 
			
		||||
                    'webpage_url': ie_result['webpage_url'],
 | 
			
		||||
                    'webpage_url_basename': url_basename(ie_result['webpage_url']),
 | 
			
		||||
                    'extractor_key': ie_result['extractor_key'],
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
@@ -596,6 +599,7 @@ class YoutubeDL(object):
 | 
			
		||||
                    {
 | 
			
		||||
                        'extractor': ie_result['extractor'],
 | 
			
		||||
                        'webpage_url': ie_result['webpage_url'],
 | 
			
		||||
                        'webpage_url_basename': url_basename(ie_result['webpage_url']),
 | 
			
		||||
                        'extractor_key': ie_result['extractor_key'],
 | 
			
		||||
                    })
 | 
			
		||||
                return r
 | 
			
		||||
 
 | 
			
		||||
@@ -1084,3 +1084,10 @@ def remove_start(s, start):
 | 
			
		||||
    if s.startswith(start):
 | 
			
		||||
        return s[len(start):]
 | 
			
		||||
    return s
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def url_basename(url):
 | 
			
		||||
    m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
 | 
			
		||||
    if not m:
 | 
			
		||||
        return u''
 | 
			
		||||
    return m.group(1)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user