mirror of
				https://gitlab.com/ytdl-org/youtube-dl.git
				synced 2025-11-04 03:17:07 -05:00 
			
		
		
		
	[netzkino] Add new extractor (Fixes #4669)
This commit is contained in:
		@@ -110,6 +110,20 @@ def expect_info_dict(self, got_dict, expected_dict):
 | 
			
		||||
        else:
 | 
			
		||||
            if isinstance(expected, compat_str) and expected.startswith('md5:'):
 | 
			
		||||
                got = 'md5:' + md5(got_dict.get(info_field))
 | 
			
		||||
            elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
 | 
			
		||||
                got = got_dict.get(info_field)
 | 
			
		||||
                self.assertTrue(
 | 
			
		||||
                    isinstance(got, list),
 | 
			
		||||
                    'Expected field %s to be a list, but it is of type %s' % (
 | 
			
		||||
                        info_field, type(got).__name__))
 | 
			
		||||
                expected_num = int(expected.partition(':')[2])
 | 
			
		||||
                assertGreaterEqual(
 | 
			
		||||
                    self, len(got), expected_num,
 | 
			
		||||
                    'Expected %d items in field %s, but only got %d' % (
 | 
			
		||||
                        expected_num, info_field, len(got)
 | 
			
		||||
                    )
 | 
			
		||||
                )
 | 
			
		||||
                continue
 | 
			
		||||
            else:
 | 
			
		||||
                got = got_dict.get(info_field)
 | 
			
		||||
            self.assertEqual(expected, got,
 | 
			
		||||
 
 | 
			
		||||
@@ -274,6 +274,7 @@ from .nbc import (
 | 
			
		||||
)
 | 
			
		||||
from .ndr import NDRIE
 | 
			
		||||
from .ndtv import NDTVIE
 | 
			
		||||
from .netzkino import NetzkinoIE
 | 
			
		||||
from .nerdcubed import NerdCubedFeedIE
 | 
			
		||||
from .newgrounds import NewgroundsIE
 | 
			
		||||
from .newstube import NewstubeIE
 | 
			
		||||
 
 | 
			
		||||
@@ -147,6 +147,17 @@ class InfoExtractor(object):
 | 
			
		||||
    like_count:     Number of positive ratings of the video
 | 
			
		||||
    dislike_count:  Number of negative ratings of the video
 | 
			
		||||
    comment_count:  Number of comments on the video
 | 
			
		||||
    comments:       A list of comments, each with one or more of the following
 | 
			
		||||
                    properties (all but one of text or html optional):
 | 
			
		||||
                        * "author" - human-readable name of the comment author
 | 
			
		||||
                        * "author_id" - user ID of the comment author
 | 
			
		||||
                        * "id" - Comment ID
 | 
			
		||||
                        * "html" - Comment as HTML
 | 
			
		||||
                        * "text" - Plain text of the comment
 | 
			
		||||
                        * "timestamp" - UNIX timestamp of comment
 | 
			
		||||
                        * "parent" - ID of the comment this one is replying to.
 | 
			
		||||
                                     Set to "root" to indicate that this is a
 | 
			
		||||
                                     comment to the original video.
 | 
			
		||||
    age_limit:      Age restriction for the video, as an integer (years)
 | 
			
		||||
    webpage_url:    The url to the video webpage, if given to youtube-dl it
 | 
			
		||||
                    should allow to get the same result again. (It will be set
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										86
									
								
								youtube_dl/extractor/netzkino.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								youtube_dl/extractor/netzkino.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,86 @@
 | 
			
		||||
# coding: utf-8
 | 
			
		||||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    parse_iso8601,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NetzkinoIE(InfoExtractor):
 | 
			
		||||
    _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
 | 
			
		||||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
 | 
			
		||||
        'md5': '92a3f8b76f8d7220acce5377ea5d4873',
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'rakete-zum-mond',
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
 | 
			
		||||
            'comments': 'mincount:3',
 | 
			
		||||
            'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
 | 
			
		||||
            'upload_date': '20120813',
 | 
			
		||||
            'thumbnail': 're:https?://.*\.jpg$',
 | 
			
		||||
            'timestamp': 1344858571,
 | 
			
		||||
            'age_limit': 12,
 | 
			
		||||
        },
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
        category_id = mobj.group('category')
 | 
			
		||||
        video_id = mobj.group('id')
 | 
			
		||||
 | 
			
		||||
        api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
 | 
			
		||||
        api_info = self._download_json(api_url, video_id)
 | 
			
		||||
        info = next(
 | 
			
		||||
            p for p in api_info['posts'] if p['slug'] == video_id)
 | 
			
		||||
        custom_fields = info['custom_fields']
 | 
			
		||||
 | 
			
		||||
        production_js = self._download_webpage(
 | 
			
		||||
            'http://www.netzkino.de/beta/dist/production.min.js', video_id,
 | 
			
		||||
            note='Downloading player code')
 | 
			
		||||
        avo_js = self._search_regex(
 | 
			
		||||
            r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
 | 
			
		||||
            production_js, 'URL templates')
 | 
			
		||||
        templates = self._parse_json(
 | 
			
		||||
            avo_js, video_id, transform_source=js_to_json)
 | 
			
		||||
 | 
			
		||||
        suffix = {
 | 
			
		||||
            'hds': '.mp4/manifest.f4m',
 | 
			
		||||
            'hls': '.mp4/master.m3u8',
 | 
			
		||||
            'pmd': '.mp4',
 | 
			
		||||
        }
 | 
			
		||||
        film_fn = custom_fields['Streaming'][0]
 | 
			
		||||
        formats = [{
 | 
			
		||||
            'format_id': key,
 | 
			
		||||
            'ext': 'mp4',
 | 
			
		||||
            'url': tpl.replace('{}', film_fn) + suffix[key],
 | 
			
		||||
        } for key, tpl in templates.items()]
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        comments = [{
 | 
			
		||||
            'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
 | 
			
		||||
            'id': c['id'],
 | 
			
		||||
            'author': c['name'],
 | 
			
		||||
            'html': c['content'],
 | 
			
		||||
            'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
 | 
			
		||||
        } for c in info.get('comments', [])]
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'comments': comments,
 | 
			
		||||
            'title': info['title'],
 | 
			
		||||
            'age_limit': int_or_none(custom_fields.get('FSK')[0]),
 | 
			
		||||
            'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
 | 
			
		||||
            'description': clean_html(info.get('content')),
 | 
			
		||||
            'thumbnail': info.get('thumbnail'),
 | 
			
		||||
            'playlist_title': api_info.get('title'),
 | 
			
		||||
            'playlist_id': category_id,
 | 
			
		||||
        }
 | 
			
		||||
@@ -205,6 +205,10 @@ def get_element_by_attribute(attribute, value, html):
 | 
			
		||||
 | 
			
		||||
def clean_html(html):
 | 
			
		||||
    """Clean an HTML snippet into a readable string"""
 | 
			
		||||
 | 
			
		||||
    if html is None:  # Convenience for sanitizing descriptions etc.
 | 
			
		||||
        return html
 | 
			
		||||
 | 
			
		||||
    # Newline vs <br />
 | 
			
		||||
    html = html.replace('\n', ' ')
 | 
			
		||||
    html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user