mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 03:47:07 -05:00 
			
		
		
		
	[heise] Fix description, thumbnail and format ID
This commit is contained in:
		@@ -404,7 +404,7 @@ class InfoExtractor(object):
 | 
			
		||||
            video_info['title'] = playlist_title
 | 
			
		||||
        return video_info
 | 
			
		||||
 | 
			
		||||
    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
 | 
			
		||||
    def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
 | 
			
		||||
        """
 | 
			
		||||
        Perform a regex search on the given string, using a single or a list of
 | 
			
		||||
        patterns returning the first matching group.
 | 
			
		||||
@@ -425,8 +425,11 @@ class InfoExtractor(object):
 | 
			
		||||
            _name = name
 | 
			
		||||
 | 
			
		||||
        if mobj:
 | 
			
		||||
            # return the first matching group
 | 
			
		||||
            return next(g for g in mobj.groups() if g is not None)
 | 
			
		||||
            if group is None:
 | 
			
		||||
                # return the first matching group
 | 
			
		||||
                return next(g for g in mobj.groups() if g is not None)
 | 
			
		||||
            else:
 | 
			
		||||
                return mobj.group(group)
 | 
			
		||||
        elif default is not _NO_DEFAULT:
 | 
			
		||||
            return default
 | 
			
		||||
        elif fatal:
 | 
			
		||||
@@ -436,11 +439,11 @@ class InfoExtractor(object):
 | 
			
		||||
                'please report this issue on http://yt-dl.org/bug' % _name)
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0):
 | 
			
		||||
    def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
 | 
			
		||||
        """
 | 
			
		||||
        Like _search_regex, but strips HTML tags and unescapes entities.
 | 
			
		||||
        """
 | 
			
		||||
        res = self._search_regex(pattern, string, name, default, fatal, flags)
 | 
			
		||||
        res = self._search_regex(pattern, string, name, default, fatal, flags, group)
 | 
			
		||||
        if res:
 | 
			
		||||
            return clean_html(res).strip()
 | 
			
		||||
        else:
 | 
			
		||||
@@ -534,9 +537,9 @@ class InfoExtractor(object):
 | 
			
		||||
            display_name = name
 | 
			
		||||
        return self._html_search_regex(
 | 
			
		||||
            r'''(?ix)<meta
 | 
			
		||||
                    (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
 | 
			
		||||
                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
 | 
			
		||||
            html, display_name, fatal=fatal, **kwargs)
 | 
			
		||||
                    (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
 | 
			
		||||
                    [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
 | 
			
		||||
            html, display_name, fatal=fatal, group='content', **kwargs)
 | 
			
		||||
 | 
			
		||||
    def _dc_search_uploader(self, html):
 | 
			
		||||
        return self._html_search_meta('dc.creator', html, 'uploader')
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    get_meta_content,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
)
 | 
			
		||||
@@ -25,11 +25,11 @@ class HeiseIE(InfoExtractor):
 | 
			
		||||
            'title': (
 | 
			
		||||
                "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone"
 | 
			
		||||
            ),
 | 
			
		||||
            'format_id': 'mp4_720',
 | 
			
		||||
            'format_id': 'mp4_720p',
 | 
			
		||||
            'timestamp': 1411812600,
 | 
			
		||||
            'upload_date': '20140927',
 | 
			
		||||
            'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.',
 | 
			
		||||
            'thumbnail': 're:https?://.*\.jpg$',
 | 
			
		||||
            'thumbnail': 're:^https?://.*\.jpe?g$',
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -49,11 +49,12 @@ class HeiseIE(InfoExtractor):
 | 
			
		||||
        info = {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
            'timestamp': parse_iso8601(get_meta_content('date', webpage)),
 | 
			
		||||
            'timestamp': parse_iso8601(
 | 
			
		||||
                self._html_search_meta('date', webpage)),
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        title = get_meta_content('fulltitle', webpage)
 | 
			
		||||
        title = self._html_search_meta('fulltitle', webpage)
 | 
			
		||||
        if title:
 | 
			
		||||
            info['title'] = title
 | 
			
		||||
        else:
 | 
			
		||||
@@ -64,9 +65,12 @@ class HeiseIE(InfoExtractor):
 | 
			
		||||
            label = source_node.attrib['label']
 | 
			
		||||
            height = int_or_none(self._search_regex(
 | 
			
		||||
                r'^(.*?_)?([0-9]+)p$', label, 'height', default=None))
 | 
			
		||||
            video_url = source_node.attrib['file']
 | 
			
		||||
            ext = determine_ext(video_url, '')
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': source_node.attrib['file'],
 | 
			
		||||
                'url': video_url,
 | 
			
		||||
                'format_note': label,
 | 
			
		||||
                'format_id': '%s_%s' % (ext, label),
 | 
			
		||||
                'height': height,
 | 
			
		||||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user