mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 09:17:07 -05:00 
			
		
		
		
	[crunchyroll] parse vilos media data(closes #17343)
This commit is contained in:
		@@ -7,7 +7,7 @@ import zlib
 | 
			
		||||
 | 
			
		||||
from hashlib import sha1
 | 
			
		||||
from math import pow, sqrt, floor
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from .vrv import VRVIE
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_b64decode,
 | 
			
		||||
    compat_etree_fromstring,
 | 
			
		||||
@@ -18,6 +18,8 @@ from ..compat import (
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    bytes_to_intlist,
 | 
			
		||||
    extract_attributes,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    intlist_to_bytes,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    lowercase_escape,
 | 
			
		||||
@@ -26,14 +28,13 @@ from ..utils import (
 | 
			
		||||
    unified_strdate,
 | 
			
		||||
    urlencode_postdata,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
    extract_attributes,
 | 
			
		||||
)
 | 
			
		||||
from ..aes import (
 | 
			
		||||
    aes_cbc_decrypt,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CrunchyrollBaseIE(InfoExtractor):
 | 
			
		||||
class CrunchyrollBaseIE(VRVIE):
 | 
			
		||||
    _LOGIN_URL = 'https://www.crunchyroll.com/login'
 | 
			
		||||
    _LOGIN_FORM = 'login_form'
 | 
			
		||||
    _NETRC_MACHINE = 'crunchyroll'
 | 
			
		||||
@@ -148,7 +149,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
 | 
			
		||||
            'description': 'md5:2d17137920c64f2f49981a7797d275ef',
 | 
			
		||||
            'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
 | 
			
		||||
            'thumbnail': r're:^https?://.*\.jpg$',
 | 
			
		||||
            'uploader': 'Yomiuri Telecasting Corporation (YTV)',
 | 
			
		||||
            'upload_date': '20131013',
 | 
			
		||||
            'url': 're:(?!.*&)',
 | 
			
		||||
@@ -221,7 +222,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': '535080',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
 | 
			
		||||
            'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
 | 
			
		||||
            'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
 | 
			
		||||
            'uploader': 'Marvelous AQL Inc.',
 | 
			
		||||
            'upload_date': '20091021',
 | 
			
		||||
@@ -437,13 +438,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 | 
			
		||||
        if 'To view this, please log in to verify you are 18 or older.' in webpage:
 | 
			
		||||
            self.raise_login_required()
 | 
			
		||||
 | 
			
		||||
        media = self._parse_json(self._search_regex(
 | 
			
		||||
            r'vilos\.config\.media\s*=\s*({.+?});',
 | 
			
		||||
            webpage, 'vilos media', default='{}'), video_id)
 | 
			
		||||
        media_metadata = media.get('metadata') or {}
 | 
			
		||||
 | 
			
		||||
        video_title = self._html_search_regex(
 | 
			
		||||
            r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
 | 
			
		||||
            webpage, 'video_title')
 | 
			
		||||
        video_title = re.sub(r' {2,}', ' ', video_title)
 | 
			
		||||
        video_description = self._parse_json(self._html_search_regex(
 | 
			
		||||
        video_description = (self._parse_json(self._html_search_regex(
 | 
			
		||||
            r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
 | 
			
		||||
            webpage, 'description', default='{}'), video_id).get('description')
 | 
			
		||||
            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
 | 
			
		||||
        if video_description:
 | 
			
		||||
            video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
 | 
			
		||||
        video_upload_date = self._html_search_regex(
 | 
			
		||||
@@ -456,6 +462,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 | 
			
		||||
            [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
 | 
			
		||||
            webpage, 'video_uploader', fatal=False)
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for stream in media.get('streams', []):
 | 
			
		||||
            formats.extend(self._extract_vrv_formats(
 | 
			
		||||
                stream.get('url'), video_id, stream.get('format'),
 | 
			
		||||
                stream.get('audio_lang'), stream.get('hardsub_lang')))
 | 
			
		||||
        if not formats:
 | 
			
		||||
            available_fmts = []
 | 
			
		||||
            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
 | 
			
		||||
                attrs = extract_attributes(a)
 | 
			
		||||
@@ -468,8 +480,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 | 
			
		||||
                    available_fmts = re.findall(p, webpage)
 | 
			
		||||
                    if available_fmts:
 | 
			
		||||
                        break
 | 
			
		||||
            if not available_fmts:
 | 
			
		||||
                available_fmts = self._FORMAT_IDS.keys()
 | 
			
		||||
            video_encode_ids = []
 | 
			
		||||
        formats = []
 | 
			
		||||
 | 
			
		||||
            for fmt in available_fmts:
 | 
			
		||||
                stream_quality, stream_format = self._FORMAT_IDS[fmt]
 | 
			
		||||
                video_format = fmt + 'p'
 | 
			
		||||
@@ -549,6 +563,16 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 | 
			
		||||
                'media_id': video_id,
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
        for subtitle in media.get('subtitles', []):
 | 
			
		||||
            subtitle_url = subtitle.get('url')
 | 
			
		||||
            if not subtitle_url:
 | 
			
		||||
                continue
 | 
			
		||||
            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
 | 
			
		||||
                'url': subtitle_url,
 | 
			
		||||
                'ext': subtitle.get('format', 'ass'),
 | 
			
		||||
            })
 | 
			
		||||
        if not subtitles:
 | 
			
		||||
            subtitles = self.extract_subtitles(video_id, webpage)
 | 
			
		||||
 | 
			
		||||
        # webpage provide more accurate data than series_title from XML
 | 
			
		||||
@@ -557,8 +581,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 | 
			
		||||
            webpage, 'series', fatal=False)
 | 
			
		||||
        season = xpath_text(metadata, 'series_title')
 | 
			
		||||
 | 
			
		||||
        episode = xpath_text(metadata, 'episode_title')
 | 
			
		||||
        episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
 | 
			
		||||
        episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
 | 
			
		||||
        episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
 | 
			
		||||
 | 
			
		||||
        season_number = int_or_none(self._search_regex(
 | 
			
		||||
            r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
 | 
			
		||||
@@ -568,7 +592,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
            'description': video_description,
 | 
			
		||||
            'thumbnail': xpath_text(metadata, 'episode_image_url'),
 | 
			
		||||
            'duration': float_or_none(media_metadata.get('duration'), 1000),
 | 
			
		||||
            'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
 | 
			
		||||
            'uploader': video_uploader,
 | 
			
		||||
            'upload_date': video_upload_date,
 | 
			
		||||
            'series': series,
 | 
			
		||||
 
 | 
			
		||||
@@ -72,7 +72,7 @@ class VRVBaseIE(InfoExtractor):
 | 
			
		||||
class VRVIE(VRVBaseIE):
 | 
			
		||||
    IE_NAME = 'vrv'
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
 | 
			
		||||
    _TEST = {
 | 
			
		||||
    _TESTS = [{
 | 
			
		||||
        'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'GR9PNZ396',
 | 
			
		||||
@@ -85,7 +85,28 @@ class VRVIE(VRVBaseIE):
 | 
			
		||||
            # m3u8 download
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
    }]
 | 
			
		||||
 | 
			
		||||
    def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
 | 
			
		||||
        if not url or stream_format not in ('hls', 'dash'):
 | 
			
		||||
            return []
 | 
			
		||||
        stream_id = hardsub_lang or audio_lang
 | 
			
		||||
        format_id = '%s-%s' % (stream_format, stream_id)
 | 
			
		||||
        if stream_format == 'hls':
 | 
			
		||||
            adaptive_formats = self._extract_m3u8_formats(
 | 
			
		||||
                url, video_id, 'mp4', m3u8_id=format_id,
 | 
			
		||||
                note='Downloading %s m3u8 information' % stream_id,
 | 
			
		||||
                fatal=False)
 | 
			
		||||
        elif stream_format == 'dash':
 | 
			
		||||
            adaptive_formats = self._extract_mpd_formats(
 | 
			
		||||
                url, video_id, mpd_id=format_id,
 | 
			
		||||
                note='Downloading %s MPD information' % stream_id,
 | 
			
		||||
                fatal=False)
 | 
			
		||||
        if audio_lang:
 | 
			
		||||
            for f in adaptive_formats:
 | 
			
		||||
                if f.get('acodec') != 'none':
 | 
			
		||||
                    f['language'] = audio_lang
 | 
			
		||||
        return adaptive_formats
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        video_id = self._match_id(url)
 | 
			
		||||
@@ -115,26 +136,9 @@ class VRVIE(VRVBaseIE):
 | 
			
		||||
        for stream_type, streams in streams_json.get('streams', {}).items():
 | 
			
		||||
            if stream_type in ('adaptive_hls', 'adaptive_dash'):
 | 
			
		||||
                for stream in streams.values():
 | 
			
		||||
                    stream_url = stream.get('url')
 | 
			
		||||
                    if not stream_url:
 | 
			
		||||
                        continue
 | 
			
		||||
                    stream_id = stream.get('hardsub_locale') or audio_locale
 | 
			
		||||
                    format_id = '%s-%s' % (stream_type.split('_')[1], stream_id)
 | 
			
		||||
                    if stream_type == 'adaptive_hls':
 | 
			
		||||
                        adaptive_formats = self._extract_m3u8_formats(
 | 
			
		||||
                            stream_url, video_id, 'mp4', m3u8_id=format_id,
 | 
			
		||||
                            note='Downloading %s m3u8 information' % stream_id,
 | 
			
		||||
                            fatal=False)
 | 
			
		||||
                    else:
 | 
			
		||||
                        adaptive_formats = self._extract_mpd_formats(
 | 
			
		||||
                            stream_url, video_id, mpd_id=format_id,
 | 
			
		||||
                            note='Downloading %s MPD information' % stream_id,
 | 
			
		||||
                            fatal=False)
 | 
			
		||||
                    if audio_locale:
 | 
			
		||||
                        for f in adaptive_formats:
 | 
			
		||||
                            if f.get('acodec') != 'none':
 | 
			
		||||
                                f['language'] = audio_locale
 | 
			
		||||
                    formats.extend(adaptive_formats)
 | 
			
		||||
                    formats.extend(self._extract_vrv_formats(
 | 
			
		||||
                        stream.get('url'), video_id, stream_type.split('_')[1],
 | 
			
		||||
                        audio_locale, stream.get('hardsub_locale')))
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        subtitles = {}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user