mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 08:27:07 -05:00 
			
		
		
		
	[huffpost] Add support
This commit is contained in:
		@@ -1,3 +1,5 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .common import FileDownloader
 | 
			
		||||
from .hls import HlsFD
 | 
			
		||||
from .http import HttpFD
 | 
			
		||||
@@ -12,10 +14,11 @@ from ..utils import (
 | 
			
		||||
def get_suitable_downloader(info_dict):
 | 
			
		||||
    """Get the downloader class that can handle the info dict."""
 | 
			
		||||
    url = info_dict['url']
 | 
			
		||||
    protocol = info_dict.get('protocol')
 | 
			
		||||
 | 
			
		||||
    if url.startswith('rtmp'):
 | 
			
		||||
        return RtmpFD
 | 
			
		||||
    if determine_ext(url) == u'm3u8':
 | 
			
		||||
    if (protocol == 'm3u8') or (protocol is None and determine_ext(url) == 'm3u8'):
 | 
			
		||||
        return HlsFD
 | 
			
		||||
    if url.startswith('mms') or url.startswith('rtsp'):
 | 
			
		||||
        return MplayerFD
 | 
			
		||||
 
 | 
			
		||||
@@ -83,6 +83,7 @@ from .googlesearch import GoogleSearchIE
 | 
			
		||||
from .hark import HarkIE
 | 
			
		||||
from .hotnewhiphop import HotNewHipHopIE
 | 
			
		||||
from .howcast import HowcastIE
 | 
			
		||||
from .huffpost import HuffPostIE
 | 
			
		||||
from .hypem import HypemIE
 | 
			
		||||
from .ign import IGNIE, OneUPIE
 | 
			
		||||
from .imdb import (
 | 
			
		||||
 
 | 
			
		||||
@@ -71,7 +71,7 @@ class InfoExtractor(object):
 | 
			
		||||
                    * player_url SWF Player URL (used for rtmpdump).
 | 
			
		||||
                    * protocol   The protocol that will be used for the actual
 | 
			
		||||
                                 download, lower-case.
 | 
			
		||||
                                 "http", "https", "rtsp", "rtmp" or so.
 | 
			
		||||
                                 "http", "https", "rtsp", "rtmp", "m3u8" or so.
 | 
			
		||||
                    * preference Order number of this format. If this field is
 | 
			
		||||
                                 present and not None, the formats get sorted
 | 
			
		||||
                                 by this field.
 | 
			
		||||
 
 | 
			
		||||
@@ -332,10 +332,16 @@ class GenericIE(InfoExtractor):
 | 
			
		||||
 | 
			
		||||
        # Look for embedded Facebook player
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<iframe[^>]+?src=(["\'])(?P<url>https://www.facebook.com/video/embed.+?)\1', webpage)
 | 
			
		||||
            r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(mobj.group('url'), 'Facebook')
 | 
			
		||||
 | 
			
		||||
        # Look for embedded Huffington Post player
 | 
			
		||||
        mobj = re.search(
 | 
			
		||||
            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
 | 
			
		||||
        if mobj is not None:
 | 
			
		||||
            return self.url_result(mobj.group('url'), 'HuffPost')
 | 
			
		||||
 | 
			
		||||
        # Start with something easy: JW Player in SWFObject
 | 
			
		||||
        mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
 | 
			
		||||
        if mobj is None:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										70
									
								
								youtube_dl/extractor/huffpost.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								youtube_dl/extractor/huffpost.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,70 @@
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    parse_duration,
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class HuffPostIE(InfoExtractor):
 | 
			
		||||
    IE_DESC = 'Huffington Post'
 | 
			
		||||
    _VALID_URL = r'''(?x)
 | 
			
		||||
        https?://(embed\.)?live\.huffingtonpost\.com/
 | 
			
		||||
        (?:
 | 
			
		||||
            r/segment/[^/]+/|
 | 
			
		||||
            HPLEmbedPlayer/\?segmentId=
 | 
			
		||||
        )
 | 
			
		||||
        (?P<id>[0-9a-f]+)'''
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
 | 
			
		||||
        'file': '52dd3e4b02a7602131000677.mp4',
 | 
			
		||||
        'md5': 'TODO',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'title': 'TODO',
 | 
			
		||||
            'description': 'TODO',
 | 
			
		||||
            'duration': 1549,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
 | 
			
		||||
        data = self._download_json(api_url, video_id)['data']
 | 
			
		||||
 | 
			
		||||
        video_title = data['title']
 | 
			
		||||
        duration = parse_duration(data['running_time'])
 | 
			
		||||
        upload_date = unified_strdate(data['schedule']['started_at'])
 | 
			
		||||
 | 
			
		||||
        thumbnails = []
 | 
			
		||||
        for url in data['images'].values():
 | 
			
		||||
            m = re.match('.*-([0-9]+x[0-9]+)\.', url)
 | 
			
		||||
            if not m:
 | 
			
		||||
                continue
 | 
			
		||||
            thumbnails.append({
 | 
			
		||||
                'url': url,
 | 
			
		||||
                'resolution': m.group(1),
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'format': key,
 | 
			
		||||
            'format_id': key.replace('/', '.'),
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'url': url,
 | 
			
		||||
            'vcodec': 'none' if key.startswith('audio/') else None,
 | 
			
		||||
        } for key, url in data['sources']['live'].items()]
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'duration': duration,
 | 
			
		||||
            'upload_date': upload_date,
 | 
			
		||||
            'thumbnails': thumbnails,
 | 
			
		||||
        }
 | 
			
		||||
		Reference in New Issue
	
	Block a user